diff --git a/.devcontainer/Dockerfile b/.devcontainer/Dockerfile index 33398f53b..f7979e7f7 100644 --- a/.devcontainer/Dockerfile +++ b/.devcontainer/Dockerfile @@ -1,6 +1,6 @@ # See here for image contents: https://github.com/microsoft/vscode-dev-containers/tree/v0.233.0/containers/python-3/.devcontainer/base.Dockerfile -# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3.9, 3.8, 3.7, 3.6, 3-bullseye, 3.10-bullseye, 3.9-bullseye, 3.8-bullseye, 3.7-bullseye, 3.6-bullseye, 3-buster, 3.10-buster, 3.9-buster, 3.8-buster, 3.7-buster, 3.6-buster +# [Choice] Python version (use -bullseye variants on local arm64/Apple Silicon): 3, 3.10, 3-bullseye, 3.10-bullseye, 3-buster, 3.10-buster, etc. ARG VARIANT="3.10-bullseye" FROM mcr.microsoft.com/vscode/devcontainers/python:0-${VARIANT} diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index c8444ed39..cbecb5603 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -6,7 +6,7 @@ "dockerfile": "Dockerfile", "context": "..", "args": { - // Update 'VARIANT' to pick a Python version: 3, 3.10, 3.9, 3.8, 3.7, 3.6 + // Update 'VARIANT' to pick a Python version: 3, 3.10, etc. // Append -bullseye or -buster to pin to an OS version. // Use -bullseye variants on local on arm64/Apple Silicon. "VARIANT": "3.10", diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 0084f0993..0935182f9 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,26 +21,25 @@ jobs: # set to false for debugging fail-fast: true matrix: - # using Python 3.8 to support running across multiple operating systems including Windows 7 include: - os: ubuntu-20.04 # use old linux so that the shared library versioning is more portable artifact_name: capa asset_name: linux - python_version: 3.8 + python_version: '3.10' - os: ubuntu-20.04 artifact_name: capa asset_name: linux-py312 - python_version: 3.12 + python_version: '3.12' - os: windows-2019 artifact_name: capa.exe asset_name: windows - python_version: 3.8 + python_version: '3.10' - os: macos-12 # use older macOS for assumed better portability artifact_name: capa asset_name: macos - python_version: 3.8 + python_version: '3.10' steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -107,7 +106,7 @@ jobs: # upload zipped binaries to Release page if: github.event_name == 'release' name: zip and upload ${{ matrix.asset_name }} - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest needs: [build] strategy: matrix: diff --git a/.github/workflows/changelog.yml b/.github/workflows/changelog.yml index 86e9026b5..8bf5d67cc 100644 --- a/.github/workflows/changelog.yml +++ b/.github/workflows/changelog.yml @@ -14,7 +14,7 @@ jobs: check_changelog: # no need to check for dependency updates via dependabot if: github.actor != 'dependabot[bot]' && github.actor != 'dependabot-preview[bot]' - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest env: NO_CHANGELOG: '[x] No CHANGELOG update needed' steps: diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 4a591d778..f54a3cb26 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -21,7 +21,7 @@ jobs: - name: Set up Python uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: '3.8' + python-version: '3.10' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/tag.yml b/.github/workflows/tag.yml index 34eabbedc..69151150e 100644 --- a/.github/workflows/tag.yml +++ b/.github/workflows/tag.yml @@ -9,7 +9,7 @@ permissions: read-all jobs: tag: name: Tag capa rules - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - name: Checkout capa-rules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 9aa826ef0..bbb39ff7b 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -26,7 +26,7 @@ env: jobs: changelog_format: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -37,15 +37,15 @@ jobs: if [ $number != 1 ]; then exit 1; fi code_style: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 # use latest available python to take advantage of best performance - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | pip install -r requirements.txt @@ -64,16 +64,16 @@ jobs: run: pre-commit run deptry --hook-stage manual rule_linter: - runs-on: ubuntu-20.04 + runs-on: ubuntu-22.04 steps: - name: Checkout capa with submodules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: submodules: recursive - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@0a5c61591373683505ea898e09a3ea4f39ef2b9c # v5.0.0 with: - python-version: "3.11" + python-version: "3.12" - name: Install capa run: | pip install -r requirements.txt @@ -90,15 +90,15 @@ jobs: matrix: os: [ubuntu-20.04, windows-2019, macos-12] # across all operating systems - python-version: ["3.8", "3.11"] + python-version: ["3.10", "3.11"] include: # on Ubuntu run these as well - os: ubuntu-20.04 - python-version: "3.8" + python-version: "3.10" - os: ubuntu-20.04 - python-version: "3.9" + python-version: "3.11" - os: ubuntu-20.04 - python-version: "3.10" + python-version: "3.12" steps: - name: Checkout capa with submodules uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 @@ -131,7 +131,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9", "3.11"] + python-version: ["3.10", "3.11"] steps: - name: Checkout capa with submodules # do only run if BN_SERIAL is available, have to do this in every step, see https://github.com/orgs/community/discussions/26726#discussioncomment-3253118 @@ -173,7 +173,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.11"] + python-version: ["3.10", "3.11"] java-version: ["17"] ghidra-version: ["11.0.1"] public-version: ["PUBLIC_20240130"] # for ghidra releases diff --git a/CHANGELOG.md b/CHANGELOG.md index 54fcd9e1f..14841b60d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ### Breaking Changes +- remove support for Python 3.8 and use Python 3.10 as minimum now #1966 @mr-tz + ### New Rules (0) - diff --git a/capa/capabilities/common.py b/capa/capabilities/common.py index a73f40afe..e9b6f253d 100644 --- a/capa/capabilities/common.py +++ b/capa/capabilities/common.py @@ -9,7 +9,7 @@ import logging import itertools import collections -from typing import Any, Tuple +from typing import Any from capa.rules import Scope, RuleSet from capa.engine import FeatureSet, MatchResults @@ -64,7 +64,7 @@ def has_file_limitation(rules: RuleSet, capabilities: MatchResults, is_standalon def find_capabilities( ruleset: RuleSet, extractor: FeatureExtractor, disable_progress=None, **kwargs -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: from capa.capabilities.static import find_static_capabilities from capa.capabilities.dynamic import find_dynamic_capabilities diff --git a/capa/capabilities/dynamic.py b/capa/capabilities/dynamic.py index 2a433be4e..5eced84d3 100644 --- a/capa/capabilities/dynamic.py +++ b/capa/capabilities/dynamic.py @@ -9,7 +9,7 @@ import logging import itertools import collections -from typing import Any, List, Tuple +from typing import Any import capa.perf import capa.features.freeze as frz @@ -24,7 +24,7 @@ def find_call_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle -) -> Tuple[FeatureSet, MatchResults]: +) -> tuple[FeatureSet, MatchResults]: """ find matches for the given rules for the given call. @@ -51,7 +51,7 @@ def find_call_capabilities( def find_thread_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle, th: ThreadHandle -) -> Tuple[FeatureSet, MatchResults, MatchResults]: +) -> tuple[FeatureSet, MatchResults, MatchResults]: """ find matches for the given rules within the given thread. @@ -89,7 +89,7 @@ def find_thread_capabilities( def find_process_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, ph: ProcessHandle -) -> Tuple[MatchResults, MatchResults, MatchResults, int]: +) -> tuple[MatchResults, MatchResults, MatchResults, int]: """ find matches for the given rules within the given process. @@ -127,7 +127,7 @@ def find_process_capabilities( def find_dynamic_capabilities( ruleset: RuleSet, extractor: DynamicFeatureExtractor, disable_progress=None -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: all_process_matches: MatchResults = collections.defaultdict(list) all_thread_matches: MatchResults = collections.defaultdict(list) all_call_matches: MatchResults = collections.defaultdict(list) @@ -135,7 +135,7 @@ def find_dynamic_capabilities( feature_counts = rdoc.DynamicFeatureCounts(file=0, processes=()) assert isinstance(extractor, DynamicFeatureExtractor) - processes: List[ProcessHandle] = list(extractor.get_processes()) + processes: list[ProcessHandle] = list(extractor.get_processes()) n_processes: int = len(processes) with capa.helpers.CapaProgressBar( diff --git a/capa/capabilities/static.py b/capa/capabilities/static.py index aeb710ae3..df8cd7e78 100644 --- a/capa/capabilities/static.py +++ b/capa/capabilities/static.py @@ -10,7 +10,7 @@ import logging import itertools import collections -from typing import Any, List, Tuple +from typing import Any import capa.perf import capa.helpers @@ -26,7 +26,7 @@ def find_instruction_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle, insn: InsnHandle -) -> Tuple[FeatureSet, MatchResults]: +) -> tuple[FeatureSet, MatchResults]: """ find matches for the given rules for the given instruction. @@ -53,7 +53,7 @@ def find_instruction_capabilities( def find_basic_block_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, f: FunctionHandle, bb: BBHandle -) -> Tuple[FeatureSet, MatchResults, MatchResults]: +) -> tuple[FeatureSet, MatchResults, MatchResults]: """ find matches for the given rules within the given basic block. @@ -93,7 +93,7 @@ def find_basic_block_capabilities( def find_code_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, fh: FunctionHandle -) -> Tuple[MatchResults, MatchResults, MatchResults, int]: +) -> tuple[MatchResults, MatchResults, MatchResults, int]: """ find matches for the given rules within the given function. @@ -131,16 +131,16 @@ def find_code_capabilities( def find_static_capabilities( ruleset: RuleSet, extractor: StaticFeatureExtractor, disable_progress=None -) -> Tuple[MatchResults, Any]: +) -> tuple[MatchResults, Any]: all_function_matches: MatchResults = collections.defaultdict(list) all_bb_matches: MatchResults = collections.defaultdict(list) all_insn_matches: MatchResults = collections.defaultdict(list) feature_counts = rdoc.StaticFeatureCounts(file=0, functions=()) - library_functions: Tuple[rdoc.LibraryFunction, ...] = () + library_functions: tuple[rdoc.LibraryFunction, ...] = () assert isinstance(extractor, StaticFeatureExtractor) - functions: List[FunctionHandle] = list(extractor.get_functions()) + functions: list[FunctionHandle] = list(extractor.get_functions()) n_funcs: int = len(functions) n_libs: int = 0 percentage: float = 0 diff --git a/capa/engine.py b/capa/engine.py index 25c26cb96..ff2d642d0 100644 --- a/capa/engine.py +++ b/capa/engine.py @@ -8,7 +8,7 @@ import copy import collections -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, Union, Mapping, Iterable, Iterator +from typing import TYPE_CHECKING, Union, Mapping, Iterable, Iterator import capa.perf import capa.features.common @@ -27,7 +27,7 @@ # to collect the locations of a feature, do: `features[Number(0x10)]` # # aliased here so that the type can be documented and xref'd. -FeatureSet = Dict[Feature, Set[Address]] +FeatureSet = dict[Feature, set[Address]] class Statement: @@ -94,7 +94,7 @@ class And(Statement): match if all of the children evaluate to True. the order of evaluation is dictated by the property - `And.children` (type: List[Statement|Feature]). + `And.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -127,7 +127,7 @@ class Or(Statement): match if any of the children evaluate to True. the order of evaluation is dictated by the property - `Or.children` (type: List[Statement|Feature]). + `Or.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -176,7 +176,7 @@ class Some(Statement): match if at least N of the children evaluate to True. the order of evaluation is dictated by the property - `Some.children` (type: List[Statement|Feature]). + `Some.children` (type: list[Statement|Feature]). a query optimizer may safely manipulate the order of these children. """ @@ -267,7 +267,7 @@ def evaluate(self, features: FeatureSet, short_circuit=True): # inspect(match_details) # # aliased here so that the type can be documented and xref'd. -MatchResults = Mapping[str, List[Tuple[Address, Result]]] +MatchResults = Mapping[str, list[tuple[Address, Result]]] def get_rule_namespaces(rule: "capa.rules.Rule") -> Iterator[str]: @@ -292,7 +292,7 @@ def index_rule_matches(features: FeatureSet, rule: "capa.rules.Rule", locations: features[capa.features.common.MatchedRule(namespace)].update(locations) -def match(rules: List["capa.rules.Rule"], features: FeatureSet, addr: Address) -> Tuple[FeatureSet, MatchResults]: +def match(rules: list["capa.rules.Rule"], features: FeatureSet, addr: Address) -> tuple[FeatureSet, MatchResults]: """ match the given rules against the given features, returning an updated set of features and the matches. diff --git a/capa/features/com/__init__.py b/capa/features/com/__init__.py index 4b4edd041..722706b20 100644 --- a/capa/features/com/__init__.py +++ b/capa/features/com/__init__.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. from enum import Enum -from typing import Dict, List from capa.helpers import assert_never @@ -22,7 +21,7 @@ class ComType(Enum): } -def load_com_database(com_type: ComType) -> Dict[str, List[str]]: +def load_com_database(com_type: ComType) -> dict[str, list[str]]: # lazy load these python files since they are so large. # that is, don't load them unless a COM feature is being handled. import capa.features.com.classes diff --git a/capa/features/com/classes.py b/capa/features/com/classes.py index f517821f8..d5048a404 100644 --- a/capa/features/com/classes.py +++ b/capa/features/com/classes.py @@ -5,9 +5,8 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List -COM_CLASSES: Dict[str, List[str]] = { +COM_CLASSES: dict[str, list[str]] = { "ClusAppWiz": ["24F97150-6689-11D1-9AA7-00C04FB93A80"], "ClusCfgAddNodesWizard": ["BB8D141E-C00A-469F-BC5C-ECD814F0BD74"], "ClusCfgCreateClusterWizard": ["B929818E-F5B0-44DC-8A00-1B5F5F5AA1F0"], diff --git a/capa/features/com/interfaces.py b/capa/features/com/interfaces.py index b2b9a9044..05d9049b4 100644 --- a/capa/features/com/interfaces.py +++ b/capa/features/com/interfaces.py @@ -5,9 +5,8 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List -COM_INTERFACES: Dict[str, List[str]] = { +COM_INTERFACES: dict[str, list[str]] = { "IClusterApplicationWizard": ["24F97151-6689-11D1-9AA7-00C04FB93A80"], "IWEExtendWizard97": ["97DEDE68-FC6B-11CF-B5F5-00A0C90AB505"], "IWCWizard97Callback": ["97DEDE67-FC6B-11CF-B5F5-00A0C90AB505"], @@ -16334,7 +16333,7 @@ "IRcsServiceDescription": ["416437de-e78b-44c9-990f-7ede1f2a0c91"], "IRcsServiceKindSupportedChangedEventArgs": ["f47ea244-e783-4866-b3a7-4e5ccf023070"], "IRcsServiceStatusChangedArgs": ["661ae45a-412a-460d-bdd4-dd8ea3c15583"], - "IRcsServiceTuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"], + "IRcsServicetuple": ["ce17a39b-2e8b-41af-b5a9-5cb072cc373c"], "IRcsSubscriptionReceivedArgs": ["04eaf06d-42bc-46cc-a637-eeb3a8723fe4"], "IRcsTransport": ["fea34759-f37c-4319-8546-ec84d21d30ff"], "IRcsTransportConfiguration": ["1fccb102-2472-4bb9-9988-c1211c83e8a9"], diff --git a/capa/features/common.py b/capa/features/common.py index e3401f7c8..5820c5793 100644 --- a/capa/features/common.py +++ b/capa/features/common.py @@ -9,10 +9,9 @@ import re import abc import codecs -import typing import logging import collections -from typing import TYPE_CHECKING, Set, Dict, List, Union, Optional +from typing import TYPE_CHECKING, Union, Optional if TYPE_CHECKING: # circular import, otherwise @@ -79,8 +78,8 @@ def __init__( self, success: bool, statement: Union["capa.engine.Statement", "Feature"], - children: List["Result"], - locations: Optional[Set[Address]] = None, + children: list["Result"], + locations: Optional[set[Address]] = None, ): super().__init__() self.success = success @@ -213,7 +212,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): # mapping from string value to list of locations. # will unique the locations later on. - matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) + matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set) assert isinstance(self.value, str) for feature, locations in features.items(): @@ -261,7 +260,7 @@ class _MatchedSubstring(Substring): note: this type should only ever be constructed by `Substring.evaluate()`. it is not part of the public API. """ - def __init__(self, substring: Substring, matches: Dict[str, Set[Address]]): + def __init__(self, substring: Substring, matches: dict[str, set[Address]]): """ args: substring: the substring feature that matches. @@ -305,7 +304,7 @@ def evaluate(self, features: "capa.engine.FeatureSet", short_circuit=True): # mapping from string value to list of locations. # will unique the locations later on. - matches: typing.DefaultDict[str, Set[Address]] = collections.defaultdict(set) + matches: collections.defaultdict[str, set[Address]] = collections.defaultdict(set) for feature, locations in features.items(): if not isinstance(feature, (String,)): @@ -353,7 +352,7 @@ class _MatchedRegex(Regex): note: this type should only ever be constructed by `Regex.evaluate()`. it is not part of the public API. """ - def __init__(self, regex: Regex, matches: Dict[str, Set[Address]]): + def __init__(self, regex: Regex, matches: dict[str, set[Address]]): """ args: regex: the regex feature that matches. diff --git a/capa/features/extractors/base_extractor.py b/capa/features/extractors/base_extractor.py index a58016bcc..17aa2c208 100644 --- a/capa/features/extractors/base_extractor.py +++ b/capa/features/extractors/base_extractor.py @@ -11,13 +11,9 @@ import dataclasses from copy import copy from types import MethodType -from typing import Any, Set, Dict, Tuple, Union, Iterator +from typing import Any, Union, Iterator, TypeAlias from dataclasses import dataclass -# TODO(williballenthin): use typing.TypeAlias directly when Python 3.9 is deprecated -# https://github.com/mandiant/capa/issues/1699 -from typing_extensions import TypeAlias - import capa.features.address from capa.features.common import Feature from capa.features.address import Address, ThreadAddress, ProcessAddress, DynamicCallAddress, AbsoluteVirtualAddress @@ -59,7 +55,7 @@ class FunctionHandle: address: Address inner: Any - ctx: Dict[str, Any] = dataclasses.field(default_factory=dict) + ctx: dict[str, Any] = dataclasses.field(default_factory=dict) @dataclass @@ -135,7 +131,7 @@ def get_sample_hashes(self) -> SampleHashes: return self._sample_hashes @abc.abstractmethod - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: """ extract features found at every scope ("global"). @@ -146,12 +142,12 @@ def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: print('0x%x: %s', va, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @abc.abstractmethod - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: """ extract file-scope features. @@ -162,7 +158,7 @@ def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: print('0x%x: %s', va, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -211,7 +207,7 @@ def get_function_name(self, addr: Address) -> str: raise KeyError(addr) @abc.abstractmethod - def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, f: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ extract function-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -227,7 +223,7 @@ def extract_function_features(self, f: FunctionHandle) -> Iterator[Tuple[Feature f [FunctionHandle]: an opaque value previously fetched from `.get_functions()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -240,7 +236,7 @@ def get_basic_blocks(self, f: FunctionHandle) -> Iterator[BBHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract basic block-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -258,7 +254,7 @@ def extract_basic_block_features(self, f: FunctionHandle, bb: BBHandle) -> Itera bb [BBHandle]: an opaque value previously fetched from `.get_basic_blocks()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -273,7 +269,7 @@ def get_instructions(self, f: FunctionHandle, bb: BBHandle) -> Iterator[InsnHand @abc.abstractmethod def extract_insn_features( self, f: FunctionHandle, bb: BBHandle, insn: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: """ extract instruction-scope features. the arguments are opaque values previously provided by `.get_functions()`, etc. @@ -293,12 +289,12 @@ def extract_insn_features( insn [InsnHandle]: an opaque value previously fetched from `.get_instructions()`. yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() -def FunctionFilter(extractor: StaticFeatureExtractor, functions: Set) -> StaticFeatureExtractor: +def FunctionFilter(extractor: StaticFeatureExtractor, functions: set) -> StaticFeatureExtractor: original_get_functions = extractor.get_functions def filtered_get_functions(self): @@ -387,7 +383,7 @@ def get_sample_hashes(self) -> SampleHashes: return self._sample_hashes @abc.abstractmethod - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: """ extract features found at every scope ("global"). @@ -398,12 +394,12 @@ def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: print(addr, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @abc.abstractmethod - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: """ extract file-scope features. @@ -414,7 +410,7 @@ def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: print(addr, feature) yields: - Tuple[Feature, Address]: feature and its location + tuple[Feature, Address]: feature and its location """ raise NotImplementedError() @@ -426,7 +422,7 @@ def get_processes(self) -> Iterator[ProcessHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: """ Yields all the features of a process. These include: - file features of the process' image @@ -449,7 +445,7 @@ def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: raise NotImplementedError() @abc.abstractmethod - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: """ Yields all the features of a thread. These include: - sequenced api traces @@ -466,7 +462,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] @abc.abstractmethod def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: """ Yields all features of a call. These include: - api name @@ -485,7 +481,7 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> raise NotImplementedError() -def ProcessFilter(extractor: DynamicFeatureExtractor, processes: Set) -> DynamicFeatureExtractor: +def ProcessFilter(extractor: DynamicFeatureExtractor, processes: set) -> DynamicFeatureExtractor: original_get_processes = extractor.get_processes def filtered_get_processes(self): diff --git a/capa/features/extractors/binexport2/__init__.py b/capa/features/extractors/binexport2/__init__.py index d3ce77d22..8032b2fca 100644 --- a/capa/features/extractors/binexport2/__init__.py +++ b/capa/features/extractors/binexport2/__init__.py @@ -17,7 +17,7 @@ import hashlib import logging import contextlib -from typing import Set, Dict, List, Tuple, Iterator +from typing import Iterator from pathlib import Path from collections import defaultdict from dataclasses import dataclass @@ -51,13 +51,13 @@ def compute_common_prefix_length(m: str, n: str) -> int: return len(m) -def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: List[Path]) -> Path: +def get_sample_from_binexport2(input_file: Path, be2: BinExport2, search_paths: list[Path]) -> Path: """attempt to find the sample file, given a BinExport2 file. searches in the same directory as the BinExport2 file, and then in search_paths. """ - def filename_similarity_key(p: Path) -> Tuple[int, str]: + def filename_similarity_key(p: Path) -> tuple[int, str]: # note closure over input_file. # sort first by length of common prefix, then by name (for stability) return (compute_common_prefix_length(p.name, input_file.name), p.name) @@ -65,7 +65,7 @@ def filename_similarity_key(p: Path) -> Tuple[int, str]: wanted_sha256: str = be2.meta_information.executable_id.lower() input_directory: Path = input_file.parent - siblings: List[Path] = [p for p in input_directory.iterdir() if p.is_file()] + siblings: list[Path] = [p for p in input_directory.iterdir() if p.is_file()] siblings.sort(key=filename_similarity_key, reverse=True) for sibling in siblings: # e.g. with open IDA files in the same directory on Windows @@ -74,7 +74,7 @@ def filename_similarity_key(p: Path) -> Tuple[int, str]: return sibling for search_path in search_paths: - candidates: List[Path] = [p for p in search_path.iterdir() if p.is_file()] + candidates: list[Path] = [p for p in search_path.iterdir() if p.is_file()] candidates.sort(key=filename_similarity_key, reverse=True) for candidate in candidates: with contextlib.suppress(PermissionError): @@ -88,27 +88,27 @@ class BinExport2Index: def __init__(self, be2: BinExport2): self.be2: BinExport2 = be2 - self.callers_by_vertex_index: Dict[int, List[int]] = defaultdict(list) - self.callees_by_vertex_index: Dict[int, List[int]] = defaultdict(list) + self.callers_by_vertex_index: dict[int, list[int]] = defaultdict(list) + self.callees_by_vertex_index: dict[int, list[int]] = defaultdict(list) # note: flow graph != call graph (vertex) - self.flow_graph_index_by_address: Dict[int, int] = {} - self.flow_graph_address_by_index: Dict[int, int] = {} + self.flow_graph_index_by_address: dict[int, int] = {} + self.flow_graph_address_by_index: dict[int, int] = {} # edges that come from the given basic block - self.source_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list) + self.source_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list) # edges that end up at the given basic block - self.target_edges_by_basic_block_index: Dict[int, List[BinExport2.FlowGraph.Edge]] = defaultdict(list) + self.target_edges_by_basic_block_index: dict[int, list[BinExport2.FlowGraph.Edge]] = defaultdict(list) - self.vertex_index_by_address: Dict[int, int] = {} + self.vertex_index_by_address: dict[int, int] = {} - self.data_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list) - self.data_reference_index_by_target_address: Dict[int, List[int]] = defaultdict(list) - self.string_reference_index_by_source_instruction_index: Dict[int, List[int]] = defaultdict(list) + self.data_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list) + self.data_reference_index_by_target_address: dict[int, list[int]] = defaultdict(list) + self.string_reference_index_by_source_instruction_index: dict[int, list[int]] = defaultdict(list) - self.insn_address_by_index: Dict[int, int] = {} - self.insn_index_by_address: Dict[int, int] = {} - self.insn_by_address: Dict[int, BinExport2.Instruction] = {} + self.insn_address_by_index: dict[int, int] = {} + self.insn_index_by_address: dict[int, int] = {} + self.insn_by_address: dict[int, BinExport2.Instruction] = {} # must index instructions first self._index_insn_addresses() @@ -208,7 +208,7 @@ def instruction_indices(basic_block: BinExport2.BasicBlock) -> Iterator[int]: def basic_block_instructions( self, basic_block: BinExport2.BasicBlock - ) -> Iterator[Tuple[int, BinExport2.Instruction, int]]: + ) -> Iterator[tuple[int, BinExport2.Instruction, int]]: """ For a given basic block, enumerate the instruction indices, the instruction instances, and their addresses. @@ -253,7 +253,7 @@ def __init__(self, be2: BinExport2, idx: BinExport2Index, buf: bytes): self.idx: BinExport2Index = idx self.buf: bytes = buf self.base_address: int = 0 - self.thunks: Dict[int, int] = {} + self.thunks: dict[int, int] = {} self._find_base_address() self._compute_thunks() @@ -279,7 +279,7 @@ def _compute_thunks(self): curr_idx: int = idx for _ in range(capa.features.common.THUNK_CHAIN_DEPTH_DELTA): - thunk_callees: List[int] = self.idx.callees_by_vertex_index[curr_idx] + thunk_callees: list[int] = self.idx.callees_by_vertex_index[curr_idx] # if this doesn't hold, then it doesn't seem like this is a thunk, # because either, len is: # 0 and the thunk doesn't point to anything, or @@ -324,7 +324,7 @@ class AddressNotMappedError(ReadMemoryError): ... @dataclass class AddressSpace: base_address: int - memory_regions: Tuple[MemoryRegion, ...] + memory_regions: tuple[MemoryRegion, ...] def read_memory(self, address: int, length: int) -> bytes: rva: int = address - self.base_address @@ -337,7 +337,7 @@ def read_memory(self, address: int, length: int) -> bytes: @classmethod def from_pe(cls, pe: PE, base_address: int): - regions: List[MemoryRegion] = [] + regions: list[MemoryRegion] = [] for section in pe.sections: address: int = section.VirtualAddress size: int = section.Misc_VirtualSize @@ -355,7 +355,7 @@ def from_pe(cls, pe: PE, base_address: int): @classmethod def from_elf(cls, elf: ELFFile, base_address: int): - regions: List[MemoryRegion] = [] + regions: list[MemoryRegion] = [] # ELF segments are for runtime data, # ELF sections are for link-time data. @@ -401,9 +401,9 @@ class AnalysisContext: class FunctionContext: ctx: AnalysisContext flow_graph_index: int - format: Set[str] - os: Set[str] - arch: Set[str] + format: set[str] + os: set[str] + arch: set[str] @dataclass diff --git a/capa/features/extractors/binexport2/arch/arm/insn.py b/capa/features/extractors/binexport2/arch/arm/insn.py index 7af93aaff..deb6a7d4d 100644 --- a/capa/features/extractors/binexport2/arch/arm/insn.py +++ b/capa/features/extractors/binexport2/arch/arm/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator, Optional +from typing import Iterator, Optional import capa.features.extractors.binexport2.helpers from capa.features.insn import MAX_STRUCTURE_SIZE, Number, Offset, OperandNumber, OperandOffset @@ -30,7 +30,7 @@ def extract_insn_number_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -91,7 +91,7 @@ def extract_insn_number_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -120,7 +120,7 @@ def extract_insn_offset_features( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 @@ -131,7 +131,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[1] != operands[2]: yield Characteristic("nzxor"), ih.address @@ -146,7 +146,7 @@ def extract_insn_nzxor_characteristic_features( def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 diff --git a/capa/features/extractors/binexport2/arch/intel/helpers.py b/capa/features/extractors/binexport2/arch/intel/helpers.py index 3696c0d93..508be3ab4 100644 --- a/capa/features/extractors/binexport2/arch/intel/helpers.py +++ b/capa/features/extractors/binexport2/arch/intel/helpers.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Optional +from typing import Optional from dataclasses import dataclass from capa.features.extractors.binexport2.helpers import get_operand_expressions @@ -32,7 +32,7 @@ def get_operand_phrase_info(be2: BinExport2, operand: BinExport2.Operand) -> Opt # Base: Any general purpose register # Displacement: An integral offset - expressions: List[BinExport2.Expression] = get_operand_expressions(be2, operand) + expressions: list[BinExport2.Expression] = get_operand_expressions(be2, operand) # skip expression up to and including BinExport2.Expression.DEREFERENCE, assume caller # has checked for BinExport2.Expression.DEREFERENCE diff --git a/capa/features/extractors/binexport2/arch/intel/insn.py b/capa/features/extractors/binexport2/arch/intel/insn.py index efb4a6fe5..5f40e8709 100644 --- a/capa/features/extractors/binexport2/arch/intel/insn.py +++ b/capa/features/extractors/binexport2/arch/intel/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.strings import capa.features.extractors.binexport2.helpers @@ -63,7 +63,7 @@ def extract_insn_number_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -123,7 +123,7 @@ def extract_insn_number_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -161,7 +161,7 @@ def is_security_cookie( # security cookie check should use SP or BP op1: BinExport2.Operand = be2.operand[instruction.operand_index[1]] - op1_exprs: List[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index] + op1_exprs: list[BinExport2.Expression] = [be2.expression[expr_i] for expr_i in op1.expression_index] if all(expr.symbol.lower() not in ("bp", "esp", "ebp", "rbp", "rsp") for expr in op1_exprs): return False @@ -192,7 +192,7 @@ def is_security_cookie( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. @@ -209,7 +209,7 @@ def extract_insn_nzxor_characteristic_features( instruction: BinExport2.Instruction = be2.instruction[ii.instruction_index] # guaranteed to be simple int/reg operands # so we don't have to realize the tree/list. - operands: List[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] + operands: list[BinExport2.Operand] = [be2.operand[operand_index] for operand_index in instruction.operand_index] if operands[0] == operands[1]: return @@ -236,7 +236,7 @@ def extract_insn_nzxor_characteristic_features( def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner be2: BinExport2 = fhi.ctx.be2 diff --git a/capa/features/extractors/binexport2/basicblock.py b/capa/features/extractors/binexport2/basicblock.py index bcb7977b4..15ad0b83b 100644 --- a/capa/features/extractors/binexport2/basicblock.py +++ b/capa/features/extractors/binexport2/basicblock.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.common import Feature, Characteristic from capa.features.address import Address, AbsoluteVirtualAddress @@ -16,20 +16,20 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner bbi: BasicBlockContext = bbh.inner idx = fhi.ctx.idx basic_block_index: int = bbi.basic_block_index - target_edges: List[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index] + target_edges: list[BinExport2.FlowGraph.Edge] = idx.target_edges_by_basic_block_index[basic_block_index] if basic_block_index in (e.source_basic_block_index for e in target_edges): basic_block_address: int = idx.get_basic_block_address(basic_block_index) yield Characteristic("tight loop"), AbsoluteVirtualAddress(basic_block_address) -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/binexport2/extractor.py b/capa/features/extractors/binexport2/extractor.py index 40d61e694..34c57a896 100644 --- a/capa/features/extractors/binexport2/extractor.py +++ b/capa/features/extractors/binexport2/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Set, List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.elf import capa.features.extractors.common @@ -48,14 +48,14 @@ def __init__(self, be2: BinExport2, buf: bytes): address_space: AddressSpace = AddressSpace.from_buf(buf, self.analysis.base_address) self.ctx: AnalysisContext = AnalysisContext(self.buf, self.be2, self.idx, self.analysis, address_space) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(list(capa.features.extractors.common.extract_format(self.buf))) self.global_features.extend(list(capa.features.extractors.common.extract_os(self.buf))) self.global_features.extend(list(capa.features.extractors.common.extract_arch(self.buf))) - self.format: Set[str] = set() - self.os: Set[str] = set() - self.arch: Set[str] = set() + self.format: set[str] = set() + self.os: set[str] = set() + self.arch: set[str] = set() for feature, _ in self.global_features: assert isinstance(feature.value, str) @@ -72,10 +72,10 @@ def __init__(self, be2: BinExport2, buf: bytes): def get_base_address(self) -> AbsoluteVirtualAddress: return AbsoluteVirtualAddress(self.analysis.base_address) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.file.extract_features(self.be2, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: @@ -97,7 +97,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: inner=FunctionContext(self.ctx, flow_graph_index, self.format, self.os, self.arch), ) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -112,7 +112,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: inner=BasicBlockContext(basic_block_index), ) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: @@ -126,5 +126,5 @@ def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHa def extract_insn_features( self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binexport2.insn.extract_features(fh, bbh, ih) diff --git a/capa/features/extractors/binexport2/file.py b/capa/features/extractors/binexport2/file.py index 9d9872bc2..fa05a1280 100644 --- a/capa/features/extractors/binexport2/file.py +++ b/capa/features/extractors/binexport2/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io import logging -from typing import Tuple, Iterator +from typing import Iterator import pefile from elftools.elf.elffile import ELFFile @@ -23,7 +23,7 @@ logger = logging.getLogger(__name__) -def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_export_names(pe) @@ -34,7 +34,7 @@ def extract_file_export_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe logger.warning("unsupported format") -def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_import_names(pe) @@ -45,7 +45,7 @@ def extract_file_import_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Fe logger.warning("unsupported format") -def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(capa.features.extractors.common.MATCH_PE): pe: pefile.PE = pefile.PE(data=buf) yield from capa.features.extractors.pefile.extract_file_section_names(pe) @@ -56,15 +56,15 @@ def extract_file_section_names(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[F logger.warning("unsupported format") -def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(_be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_format(buf) -def extract_features(be2: BinExport2, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_features(be2: BinExport2, buf: bytes) -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(be2, buf): diff --git a/capa/features/extractors/binexport2/function.py b/capa/features/extractors/binexport2/function.py index 0c49036d1..c550b81cc 100644 --- a/capa/features/extractors/binexport2/function.py +++ b/capa/features/extractors/binexport2/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.file import FunctionName from capa.features.common import Feature, Characteristic @@ -16,7 +16,7 @@ from capa.features.extractors.binexport2.binexport2_pb2 import BinExport2 -def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -32,7 +32,7 @@ def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Feature, Add yield Characteristic("calls to"), AbsoluteVirtualAddress(caller_address) -def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -40,7 +40,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address flow_graph_index: int = fhi.flow_graph_index flow_graph: BinExport2.FlowGraph = be2.flow_graph[flow_graph_index] - edges: List[Tuple[int, int]] = [] + edges: list[tuple[int, int]] = [] for edge in flow_graph.edge: edges.append((edge.source_basic_block_index, edge.target_basic_block_index)) @@ -48,7 +48,7 @@ def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address yield Characteristic("loop"), fh.address -def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_name(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner be2: BinExport2 = fhi.ctx.be2 @@ -63,7 +63,7 @@ def extract_function_name(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address yield FunctionName(vertex.mangled_name), fh.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/binexport2/helpers.py b/capa/features/extractors/binexport2/helpers.py index e4e7f7b76..29c40e81d 100644 --- a/capa/features/extractors/binexport2/helpers.py +++ b/capa/features/extractors/binexport2/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Set, Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional from collections import defaultdict from dataclasses import dataclass @@ -22,7 +22,7 @@ HAS_ARCH_ARM = {ARCH_AARCH64} -def mask_immediate(arch: Set[str], immediate: int) -> int: +def mask_immediate(arch: set[str], immediate: int) -> int: if arch & HAS_ARCH64: immediate &= 0xFFFFFFFFFFFFFFFF elif arch & HAS_ARCH32: @@ -30,7 +30,7 @@ def mask_immediate(arch: Set[str], immediate: int) -> int: return immediate -def twos_complement(arch: Set[str], immediate: int, default: Optional[int] = None) -> int: +def twos_complement(arch: set[str], immediate: int, default: Optional[int] = None) -> int: if default is not None: return capa.features.extractors.helpers.twos_complement(immediate, default) elif arch & HAS_ARCH64: @@ -55,12 +55,12 @@ def is_vertex_type(vertex: BinExport2.CallGraph.Vertex, type_: BinExport2.CallGr def _prune_expression_tree_empty_shifts( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.OPERATOR: if len(children_tree_indexes) == 0 and expression.symbol in ("lsl", "lsr"): @@ -85,12 +85,12 @@ def _prune_expression_tree_empty_shifts( def _prune_expression_tree_empty_commas( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.OPERATOR: if len(children_tree_indexes) == 1 and expression.symbol == ",": @@ -121,7 +121,7 @@ def _prune_expression_tree_empty_commas( def _prune_expression_tree( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], ): _prune_expression_tree_empty_shifts(be2, operand, expression_tree, 0) _prune_expression_tree_empty_commas(be2, operand, expression_tree, 0) @@ -131,7 +131,7 @@ def _prune_expression_tree( def _build_expression_tree( be2: BinExport2, operand: BinExport2.Operand, -) -> List[List[int]]: +) -> list[list[int]]: # The reconstructed expression tree layout, linking parent nodes to their children. # # There is one list of integers for each expression in the operand. @@ -159,7 +159,7 @@ def _build_expression_tree( # exist (see https://github.com/NationalSecurityAgency/ghidra/issues/6817) return [] - tree: List[List[int]] = [] + tree: list[list[int]] = [] for i, expression_index in enumerate(operand.expression_index): children = [] @@ -181,16 +181,16 @@ def _build_expression_tree( def _fill_operand_expression_list( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, - expression_list: List[BinExport2.Expression], + expression_list: list[BinExport2.Expression], ): """ Walk the given expression tree and collect the expression nodes in-order. """ expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: assert len(children_tree_indexes) == 0 @@ -282,10 +282,10 @@ def _fill_operand_expression_list( raise NotImplementedError(expression.type) -def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> List[BinExport2.Expression]: +def get_operand_expressions(be2: BinExport2, op: BinExport2.Operand) -> list[BinExport2.Expression]: tree = _build_expression_tree(be2, op) - expressions: List[BinExport2.Expression] = [] + expressions: list[BinExport2.Expression] = [] _fill_operand_expression_list(be2, op, tree, 0, expressions) return expressions @@ -331,11 +331,11 @@ def get_instruction_mnemonic(be2: BinExport2, instruction: BinExport2.Instructio return be2.mnemonic[instruction.mnemonic_index].name.lower() -def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> List[BinExport2.Operand]: +def get_instruction_operands(be2: BinExport2, instruction: BinExport2.Instruction) -> list[BinExport2.Operand]: return [be2.operand[operand_index] for operand_index in instruction.operand_index] -def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]: +def split_with_delimiters(s: str, delimiters: tuple[str, ...]) -> Iterator[str]: """ Splits a string by any of the provided delimiter characters, including the delimiters in the results. @@ -355,7 +355,7 @@ def split_with_delimiters(s: str, delimiters: Tuple[str, ...]) -> Iterator[str]: yield s[start:] -BinExport2OperandPattern = Union[str, Tuple[str, ...]] +BinExport2OperandPattern = Union[str, tuple[str, ...]] @dataclass @@ -382,8 +382,8 @@ class BinExport2InstructionPattern: This matcher uses the BinExport2 data layout under the hood. """ - mnemonics: Tuple[str, ...] - operands: Tuple[Union[str, BinExport2OperandPattern], ...] + mnemonics: tuple[str, ...] + operands: tuple[Union[str, BinExport2OperandPattern], ...] capture: Optional[str] @classmethod @@ -438,7 +438,7 @@ def from_str(cls, query: str): mnemonic, _, rest = pattern.partition(" ") mnemonics = mnemonic.split("|") - operands: List[Union[str, Tuple[str, ...]]] = [] + operands: list[Union[str, tuple[str, ...]]] = [] while rest: rest = rest.strip() if not rest.startswith("["): @@ -509,7 +509,7 @@ class MatchResult: expression: BinExport2.Expression def match( - self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]] + self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]] ) -> Optional["BinExport2InstructionPattern.MatchResult"]: """ Match the given BinExport2 data against this pattern. @@ -602,10 +602,10 @@ def match( class BinExport2InstructionPatternMatcher: """Index and match a collection of instruction patterns.""" - def __init__(self, queries: List[BinExport2InstructionPattern]): + def __init__(self, queries: list[BinExport2InstructionPattern]): self.queries = queries # shard the patterns by (mnemonic, #operands) - self._index: Dict[Tuple[str, int], List[BinExport2InstructionPattern]] = defaultdict(list) + self._index: dict[tuple[str, int], list[BinExport2InstructionPattern]] = defaultdict(list) for query in queries: for mnemonic in query.mnemonics: @@ -623,7 +623,7 @@ def from_str(cls, patterns: str): ) def match( - self, mnemonic: str, operand_expressions: List[List[BinExport2.Expression]] + self, mnemonic: str, operand_expressions: list[list[BinExport2.Expression]] ) -> Optional[BinExport2InstructionPattern.MatchResult]: queries = self._index.get((mnemonic.lower(), len(operand_expressions)), []) for query in queries: diff --git a/capa/features/extractors/binexport2/insn.py b/capa/features/extractors/binexport2/insn.py index 8f2e6af99..42abe95d1 100644 --- a/capa/features/extractors/binexport2/insn.py +++ b/capa/features/extractors/binexport2/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers import capa.features.extractors.strings @@ -32,7 +32,7 @@ logger = logging.getLogger(__name__) -def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -68,7 +68,7 @@ def extract_insn_api_features(fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -77,7 +77,7 @@ def extract_insn_number_features( yield from capa.features.extractors.binexport2.arch.arm.insn.extract_insn_number_features(fh, bbh, ih) -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -92,7 +92,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl # disassembler already identified string reference from instruction return - reference_addresses: List[int] = [] + reference_addresses: list[int] = [] if instruction_index in idx.data_reference_index_by_source_instruction_index: for data_reference_index in idx.data_reference_index_by_source_instruction_index[instruction_index]: @@ -142,7 +142,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_insn_string_features( fh: FunctionHandle, _bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -161,7 +161,7 @@ def extract_insn_string_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -172,7 +172,7 @@ def extract_insn_offset_features( def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -187,7 +187,7 @@ def extract_insn_nzxor_characteristic_features( def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner ii: InstructionContext = ih.inner @@ -199,7 +199,7 @@ def extract_insn_mnemonic_features( yield Mnemonic(mnemonic_name), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope; @@ -221,7 +221,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: fhi: FunctionContext = fh.inner if fhi.arch & HAS_ARCH_INTEL: @@ -234,7 +234,7 @@ def extract_function_indirect_call_characteristic_features( ) -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/binja/basicblock.py b/capa/features/extractors/binja/basicblock.py index e74c9f486..5cb8ca138 100644 --- a/capa/features/extractors/binja/basicblock.py +++ b/capa/features/extractors/binja/basicblock.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import string -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock @@ -98,22 +98,22 @@ def bb_contains_stackstring(f: Function, bb: MediumLevelILBasicBlock) -> bool: return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" - bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner + bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner if bb[1] is not None and bb_contains_stackstring(fh.inner, bb[1]): yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract tight loop indicators from a basic block""" - bb: Tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner + bb: tuple[BinjaBasicBlock, MediumLevelILBasicBlock] = bbh.inner for edge in bb[0].outgoing_edges: if edge.target.start == bb[0].start: yield Characteristic("tight loop"), bbh.address -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/binja/extractor.py b/capa/features/extractors/binja/extractor.py index e542494af..a5bea1596 100644 --- a/capa/features/extractors/binja/extractor.py +++ b/capa/features/extractors/binja/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import binaryninja as binja @@ -30,7 +30,7 @@ class BinjaFeatureExtractor(StaticFeatureExtractor): def __init__(self, bv: binja.BinaryView): super().__init__(hashes=SampleHashes.from_bytes(bv.file.raw.read(0, bv.file.raw.length))) self.bv = bv - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.binja.file.extract_file_format(self.bv)) self.global_features.extend(capa.features.extractors.binja.global_.extract_os(self.bv)) self.global_features.extend(capa.features.extractors.binja.global_.extract_arch(self.bv)) @@ -48,7 +48,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: for f in self.bv.functions: yield FunctionHandle(address=AbsoluteVirtualAddress(f.start), inner=f) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binja.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -63,13 +63,13 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: yield BBHandle(address=AbsoluteVirtualAddress(bb.start), inner=(bb, mlil_bb)) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.binja.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: import capa.features.extractors.binja.helpers as binja_helpers - bb: Tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner + bb: tuple[binja.BasicBlock, binja.MediumLevelILBasicBlock] = bbh.inner addr = bb[0].start for text, length in bb[0]: diff --git a/capa/features/extractors/binja/file.py b/capa/features/extractors/binja/file.py index d5bb5a7c5..b3426212c 100644 --- a/capa/features/extractors/binja/file.py +++ b/capa/features/extractors/binja/file.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Segment, BinaryView, SymbolType, SymbolBinding @@ -18,7 +18,7 @@ from capa.features.extractors.binja.helpers import read_c_string, unmangle_c_name -def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature, Address]]: +def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[tuple[Feature, Address]]: """check segment for embedded PE""" start = 0 if bv.view_type == "PE" and seg.start == bv.start: @@ -32,13 +32,13 @@ def check_segment_for_pe(bv: BinaryView, seg: Segment) -> Iterator[Tuple[Feature yield Characteristic("embedded pe"), FileOffsetAddress(seg.start + offset) -def extract_file_embedded_pe(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract embedded PE features""" for seg in bv.segments: yield from check_segment_for_pe(bv, seg) -def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract function exports""" for sym in bv.get_symbols_of_type(SymbolType.FunctionSymbol) + bv.get_symbols_of_type(SymbolType.DataSymbol): if sym.binding in [SymbolBinding.GlobalBinding, SymbolBinding.WeakBinding]: @@ -72,7 +72,7 @@ def extract_file_export_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address yield Characteristic("forwarded export"), AbsoluteVirtualAddress(sym.address) -def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -96,19 +96,19 @@ def extract_file_import_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address yield Import(name), addr -def extract_file_section_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract section names""" for name, section in bv.sections.items(): yield Section(name), AbsoluteVirtualAddress(section.start) -def extract_file_strings(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for s in bv.strings: yield String(s.value), FileOffsetAddress(s.start) -def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -127,7 +127,7 @@ def extract_file_function_names(bv: BinaryView) -> Iterator[Tuple[Feature, Addre yield FunctionName(name[1:]), sym.address -def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: view_type = bv.view_type if view_type in ["PE", "COFF"]: yield Format(FORMAT_PE), NO_ADDRESS @@ -140,7 +140,7 @@ def extract_file_format(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {view_type}") -def extract_features(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_features(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(bv): diff --git a/capa/features/extractors/binja/function.py b/capa/features/extractors/binja/function.py index 520de0b3f..058f98a94 100644 --- a/capa/features/extractors/binja/function.py +++ b/capa/features/extractors/binja/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import Function, BinaryView, SymbolType, RegisterValueType, LowLevelILOperation @@ -95,7 +95,7 @@ def extract_function_name(fh: FunctionHandle): yield FunctionName(name[1:]), sym.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/binja/global_.py b/capa/features/extractors/binja/global_.py index 7430d7405..4f774ffe7 100644 --- a/capa/features/extractors/binja/global_.py +++ b/capa/features/extractors/binja/global_.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from binaryninja import BinaryView @@ -16,7 +16,7 @@ logger = logging.getLogger(__name__) -def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_os(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: name = bv.platform.name if "-" in name: name = name.split("-")[0] @@ -45,7 +45,7 @@ def extract_os(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: return -def extract_arch(bv: BinaryView) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(bv: BinaryView) -> Iterator[tuple[Feature, Address]]: arch = bv.arch.name if arch == "x86_64": yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/binja/helpers.py b/capa/features/extractors/binja/helpers.py index 0ce0f073b..27e8d29e0 100644 --- a/capa/features/extractors/binja/helpers.py +++ b/capa/features/extractors/binja/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import List, Callable +from typing import Callable from dataclasses import dataclass from binaryninja import BinaryView, LowLevelILInstruction @@ -17,7 +17,7 @@ class DisassemblyInstruction: address: int length: int - text: List[InstructionTextToken] + text: list[InstructionTextToken] LLIL_VISITOR = Callable[[LowLevelILInstruction, LowLevelILInstruction, int], bool] @@ -54,7 +54,7 @@ def unmangle_c_name(name: str) -> str: def read_c_string(bv: BinaryView, offset: int, max_len: int) -> str: - s: List[str] = [] + s: list[str] = [] while len(s) < max_len: try: c = bv.read(offset + len(s), 1)[0] diff --git a/capa/features/extractors/binja/insn.py b/capa/features/extractors/binja/insn.py index f2b8fefc2..0e8b74ea8 100644 --- a/capa/features/extractors/binja/insn.py +++ b/capa/features/extractors/binja/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, List, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional from binaryninja import Function from binaryninja import BasicBlock as BinjaBasicBlock @@ -64,7 +64,7 @@ def is_stub_function(bv: BinaryView, addr: int) -> Optional[int]: return None -def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction API features @@ -123,7 +123,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -131,7 +131,7 @@ def extract_insn_number_features( """ func: Function = fh.inner - results: List[Tuple[Any[Number, OperandNumber], Address]] = [] + results: list[tuple[Any[Number, OperandNumber], Address]] = [] def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: if il.operation == LowLevelILOperation.LLIL_LOAD: @@ -162,7 +162,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index yield from results -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences example: @@ -209,7 +209,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_string_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -266,7 +266,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -275,7 +275,7 @@ def extract_insn_offset_features( """ func: Function = fh.inner - results: List[Tuple[Any[Offset, OperandOffset], Address]] = [] + results: list[tuple[Any[Offset, OperandOffset], Address]] = [] address_size = func.view.arch.address_size * 8 def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index: int) -> bool: @@ -353,7 +353,7 @@ def is_nzxor_stack_cookie(f: Function, bb: BinjaBasicBlock, llil: LowLevelILInst def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction non-zeroing XOR instruction ignore expected non-zeroing XORs, e.g. security cookies @@ -382,7 +382,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" insn: DisassemblyInstruction = ih.inner yield Mnemonic(insn.text[0].text), ih.address @@ -390,7 +390,7 @@ def extract_insn_mnemonic_features( def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -401,7 +401,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -444,7 +444,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILOperation, index: def extract_insn_segment_access_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access""" func: Function = fh.inner @@ -471,7 +471,7 @@ def llil_checker(il: LowLevelILInstruction, parent: LowLevelILInstruction, index def extract_insn_cross_section_cflow( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" func: Function = fh.inner bv: BinaryView = func.view @@ -491,7 +491,7 @@ def extract_insn_cross_section_cflow( yield Characteristic("cross section flow"), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -534,7 +534,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -562,7 +562,7 @@ def extract_function_indirect_call_characteristic_features( yield Characteristic("indirect call"), ih.address -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/cape/call.py b/capa/features/extractors/cape/call.py index 0bee22fcc..229edf155 100644 --- a/capa/features/extractors/cape/call.py +++ b/capa/features/extractors/cape/call.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.helpers import assert_never @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: """ this method extracts the given call's features (such as API name and arguments), and returns them as API, Number, and String features. @@ -55,7 +55,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/cape/extractor.py b/capa/features/extractors/cape/extractor.py index 923781aeb..fd042f570 100644 --- a/capa/features/extractors/cape/extractor.py +++ b/capa/features/extractors/cape/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, Tuple, Union, Iterator +from typing import Union, Iterator import capa.features.extractors.cape.call import capa.features.extractors.cape.file @@ -50,16 +50,16 @@ def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: assert self.report.static is not None and self.report.static.pe is not None return AbsoluteVirtualAddress(self.report.static.pe.imagebase) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.file.extract_features(self.report) def get_processes(self) -> Iterator[ProcessHandle]: yield from capa.features.extractors.cape.file.get_processes(self.report) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.process.extract_features(ph) def get_process_name(self, ph) -> str: @@ -69,7 +69,7 @@ def get_process_name(self, ph) -> str: def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: yield from capa.features.extractors.cape.process.get_threads(ph) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -81,7 +81,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.cape.call.extract_features(ph, th, ch) def get_call_name(self, ph, th, ch) -> str: @@ -122,7 +122,7 @@ def get_call_name(self, ph, th, ch) -> str: return "".join(parts) @classmethod - def from_report(cls, report: Dict) -> "CapeExtractor": + def from_report(cls, report: dict) -> "CapeExtractor": cr = CapeReport.model_validate(report) if cr.info.version not in TESTED_VERSIONS: diff --git a/capa/features/extractors/cape/file.py b/capa/features/extractors/cape/file.py index 3143504c0..945b22089 100644 --- a/capa/features/extractors/cape/file.py +++ b/capa/features/extractors/cape/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.file import Export, Import, Section from capa.features.common import String, Feature @@ -41,7 +41,7 @@ def get_processes(report: CapeReport) -> Iterator[ProcessHandle]: seen_processes[addr].append(process) -def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: """ extract imported function names """ @@ -62,57 +62,57 @@ def extract_import_names(report: CapeReport) -> Iterator[Tuple[Feature, Address] yield Import(name), AbsoluteVirtualAddress(function.address) -def extract_export_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_export_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: assert report.static is not None and report.static.pe is not None for function in report.static.pe.exports: yield Export(function.name), AbsoluteVirtualAddress(function.address) -def extract_section_names(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_section_names(report: CapeReport) -> Iterator[tuple[Feature, Address]]: assert report.static is not None and report.static.pe is not None for section in report.static.pe.sections: yield Section(section.name), AbsoluteVirtualAddress(section.virtual_address) -def extract_file_strings(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if report.strings is not None: for string in report.strings: yield String(string), NO_ADDRESS -def extract_used_regkeys(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_regkeys(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for regkey in report.behavior.summary.keys: yield String(regkey), NO_ADDRESS -def extract_used_files(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_files(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for file in report.behavior.summary.files: yield String(file), NO_ADDRESS -def extract_used_mutexes(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_mutexes(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for mutex in report.behavior.summary.mutexes: yield String(mutex), NO_ADDRESS -def extract_used_commands(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_commands(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for cmd in report.behavior.summary.executed_commands: yield String(cmd), NO_ADDRESS -def extract_used_apis(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_apis(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for symbol in report.behavior.summary.resolved_apis: yield String(symbol), NO_ADDRESS -def extract_used_services(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_used_services(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for svc in report.behavior.summary.created_services: yield String(svc), NO_ADDRESS for svc in report.behavior.summary.started_services: yield String(svc), NO_ADDRESS -def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(report): yield feature, addr diff --git a/capa/features/extractors/cape/global_.py b/capa/features/extractors/cape/global_.py index b73e5ab40..a3b23a7f7 100644 --- a/capa/features/extractors/cape/global_.py +++ b/capa/features/extractors/cape/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ( OS, @@ -28,7 +28,7 @@ logger = logging.getLogger(__name__) -def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if "Intel 80386" in report.target.file.type: yield Arch(ARCH_I386), NO_ADDRESS elif "x86-64" in report.target.file.type: @@ -40,7 +40,7 @@ def extract_arch(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: ) -def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_format(report: CapeReport) -> Iterator[tuple[Feature, Address]]: if "PE" in report.target.file.type: yield Format(FORMAT_PE), NO_ADDRESS elif "ELF" in report.target.file.type: @@ -52,7 +52,7 @@ def extract_format(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: ) -def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_os(report: CapeReport) -> Iterator[tuple[Feature, Address]]: # this variable contains the output of the file command file_output = report.target.file.type @@ -80,7 +80,7 @@ def extract_os(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: yield OS(OS_ANY), NO_ADDRESS -def extract_features(report: CapeReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: CapeReport) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(report): yield feature, addr diff --git a/capa/features/extractors/cape/helpers.py b/capa/features/extractors/cape/helpers.py index 31dc6c91b..46c584017 100644 --- a/capa/features/extractors/cape/helpers.py +++ b/capa/features/extractors/cape/helpers.py @@ -6,12 +6,12 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, List +from typing import Any from capa.features.extractors.base_extractor import ProcessHandle -def find_process(processes: List[Dict[str, Any]], ph: ProcessHandle) -> Dict[str, Any]: +def find_process(processes: list[dict[str, Any]], ph: ProcessHandle) -> dict[str, Any]: """ find a specific process identified by a process handler. diff --git a/capa/features/extractors/cape/models.py b/capa/features/extractors/cape/models.py index 61e60f716..20bedec24 100644 --- a/capa/features/extractors/cape/models.py +++ b/capa/features/extractors/cape/models.py @@ -6,10 +6,9 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import binascii -from typing import Any, Dict, List, Union, Literal, Optional +from typing import Any, Union, Literal, Optional, Annotated, TypeAlias from pydantic import Field, BaseModel, ConfigDict -from typing_extensions import Annotated, TypeAlias from pydantic.functional_validators import BeforeValidator @@ -59,11 +58,11 @@ class FlexibleModel(BaseModel): # in a field with this type. # then we can update the model with the discovered shape. TODO: TypeAlias = None -ListTODO: TypeAlias = List[None] +ListTODO: TypeAlias = list[None] DictTODO: TypeAlias = ExactModel -EmptyDict: TypeAlias = BaseModel -EmptyList: TypeAlias = List[Any] +Emptydict: TypeAlias = BaseModel +EmptyList: TypeAlias = list[Any] class Info(FlexibleModel): @@ -77,7 +76,7 @@ class ImportedSymbol(ExactModel): class ImportedDll(ExactModel): dll: str - imports: List[ImportedSymbol] + imports: list[ImportedSymbol] class DirectoryEntry(ExactModel): @@ -149,7 +148,7 @@ class Signer(ExactModel): aux_valid: Optional[bool] = None aux_error: Optional[bool] = None aux_error_desc: Optional[str] = None - aux_signers: Optional[List[AuxSigner]] = None + aux_signers: Optional[list[AuxSigner]] = None class Overlay(ExactModel): @@ -178,22 +177,22 @@ class PE(ExactModel): pdbpath: Optional[str] = None timestamp: str - # List[ImportedDll], or Dict[basename(dll), ImportedDll] - imports: Union[List[ImportedDll], Dict[str, ImportedDll]] + # list[ImportedDll], or dict[basename(dll), ImportedDll] + imports: Union[list[ImportedDll], dict[str, ImportedDll]] imported_dll_count: Optional[int] = None imphash: str exported_dll_name: Optional[str] = None - exports: List[ExportedSymbol] + exports: list[ExportedSymbol] - dirents: List[DirectoryEntry] - sections: List[Section] + dirents: list[DirectoryEntry] + sections: list[Section] ep_bytes: Optional[HexBytes] = None overlay: Optional[Overlay] = None - resources: List[Resource] - versioninfo: List[KV] + resources: list[Resource] + versioninfo: list[KV] # base64 encoded data icon: Optional[str] = None @@ -204,7 +203,7 @@ class PE(ExactModel): # short hex string icon_dhash: Optional[str] = None - digital_signers: List[DigitalSigner] + digital_signers: list[DigitalSigner] guest_signers: Signer @@ -217,9 +216,9 @@ class File(FlexibleModel): cape_type: Optional[str] = None pid: Optional[Union[int, Literal[""]]] = None - name: Union[List[str], str] + name: Union[list[str], str] path: str - guest_paths: Union[List[str], str, None] + guest_paths: Union[list[str], str, None] timestamp: Optional[str] = None # @@ -244,7 +243,7 @@ class File(FlexibleModel): ep_bytes: Optional[HexBytes] = None entrypoint: Optional[int] = None data: Optional[str] = None - strings: Optional[List[str]] = None + strings: Optional[list[str]] = None # # detections (skip) @@ -283,7 +282,7 @@ class Call(ExactModel): api: str - arguments: List[Argument] + arguments: list[Argument] status: bool return_: HexInt = Field(alias="return") pretty_return: Optional[str] = None @@ -304,9 +303,9 @@ class Process(ExactModel): parent_id: int module_path: str first_seen: str - calls: List[Call] - threads: List[int] - environ: Dict[str, str] + calls: list[Call] + threads: list[int] + environ: dict[str, str] class ProcessTree(ExactModel): @@ -314,25 +313,25 @@ class ProcessTree(ExactModel): pid: int parent_id: int module_path: str - threads: List[int] - environ: Dict[str, str] - children: List["ProcessTree"] + threads: list[int] + environ: dict[str, str] + children: list["ProcessTree"] class Summary(ExactModel): - files: List[str] - read_files: List[str] - write_files: List[str] - delete_files: List[str] - keys: List[str] - read_keys: List[str] - write_keys: List[str] - delete_keys: List[str] - executed_commands: List[str] - resolved_apis: List[str] - mutexes: List[str] - created_services: List[str] - started_services: List[str] + files: list[str] + read_files: list[str] + write_files: list[str] + delete_files: list[str] + keys: list[str] + read_keys: list[str] + write_keys: list[str] + delete_keys: list[str] + executed_commands: list[str] + resolved_apis: list[str] + mutexes: list[str] + created_services: list[str] + started_services: list[str] class EncryptedBuffer(ExactModel): @@ -349,12 +348,12 @@ class Behavior(ExactModel): summary: Summary # list of processes, of threads, of calls - processes: List[Process] + processes: list[Process] # tree of processes - processtree: List[ProcessTree] + processtree: list[ProcessTree] - anomaly: List[str] - encryptedbuffers: List[EncryptedBuffer] + anomaly: list[str] + encryptedbuffers: list[EncryptedBuffer] # these are small objects that describe atomic events, # like file move, registry access. # we'll detect the same with our API call analysis. @@ -373,7 +372,7 @@ class Static(ExactModel): class Cape(ExactModel): - payloads: List[ProcessFile] + payloads: list[ProcessFile] configs: Skip = None @@ -389,7 +388,7 @@ class CapeReport(FlexibleModel): # static analysis results # static: Optional[Static] = None - strings: Optional[List[str]] = None + strings: Optional[list[str]] = None # # dynamic analysis results @@ -398,9 +397,9 @@ class CapeReport(FlexibleModel): behavior: Behavior # post-processed results: payloads and extracted configs - CAPE: Optional[Union[Cape, List]] = None - dropped: Optional[List[File]] = None - procdump: Optional[List[ProcessFile]] = None + CAPE: Optional[Union[Cape, list]] = None + dropped: Optional[list[File]] = None + procdump: Optional[list[ProcessFile]] = None procmemory: ListTODO # ========================================================================= @@ -437,7 +436,7 @@ class CapeReport(FlexibleModel): malfamily_tag: Optional[str] = None malscore: float detections: Skip = None - detections2pid: Optional[Dict[int, List[str]]] = None + detections2pid: Optional[dict[int, list[str]]] = None # AV detections for the sample. virustotal: Skip = None diff --git a/capa/features/extractors/cape/process.py b/capa/features/extractors/cape/process.py index 909a9637e..d2bc260c5 100644 --- a/capa/features/extractors/cape/process.py +++ b/capa/features/extractors/cape/process.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import List, Tuple, Iterator +from typing import Iterator from capa.features.common import String, Feature from capa.features.address import Address, ThreadAddress @@ -22,14 +22,14 @@ def get_threads(ph: ProcessHandle) -> Iterator[ThreadHandle]: get the threads associated with a given process """ process: Process = ph.inner - threads: List[int] = process.threads + threads: list[int] = process.threads for thread in threads: address: ThreadAddress = ThreadAddress(process=ph.address, tid=thread) yield ThreadHandle(address=address, inner={}) -def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_environ_strings(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: """ extract strings from a process' provided environment variables. """ @@ -39,7 +39,7 @@ def extract_environ_strings(ph: ProcessHandle) -> Iterator[Tuple[Feature, Addres yield String(value), ph.address -def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: for handler in PROCESS_HANDLERS: for feature, addr in handler(ph): yield feature, addr diff --git a/capa/features/extractors/common.py b/capa/features/extractors/common.py index aa2144c73..e2e95a857 100644 --- a/capa/features/extractors/common.py +++ b/capa/features/extractors/common.py @@ -10,7 +10,7 @@ import logging import binascii import contextlib -from typing import Tuple, Iterator +from typing import Iterator import pefile @@ -45,7 +45,7 @@ MATCH_JSON_OBJECT = b'{"' -def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(buf: bytes, **kwargs) -> Iterator[tuple[String, Address]]: """ extract ASCII and UTF-16 LE strings from file """ @@ -56,7 +56,7 @@ def extract_file_strings(buf: bytes, **kwargs) -> Iterator[Tuple[String, Address yield String(s.s), FileOffsetAddress(s.offset) -def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_format(buf: bytes) -> Iterator[tuple[Feature, Address]]: if buf.startswith(MATCH_PE): yield Format(FORMAT_PE), NO_ADDRESS elif buf.startswith(MATCH_ELF): @@ -79,7 +79,7 @@ def extract_format(buf: bytes) -> Iterator[Tuple[Feature, Address]]: return -def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(buf) -> Iterator[tuple[Feature, Address]]: if buf.startswith(MATCH_PE): yield from capa.features.extractors.pefile.extract_file_arch(pe=pefile.PE(data=buf)) @@ -111,7 +111,7 @@ def extract_arch(buf) -> Iterator[Tuple[Feature, Address]]: return -def extract_os(buf, os=OS_AUTO) -> Iterator[Tuple[Feature, Address]]: +def extract_os(buf, os=OS_AUTO) -> Iterator[tuple[Feature, Address]]: if os != OS_AUTO: yield OS(os), NO_ADDRESS diff --git a/capa/features/extractors/dnfile/extractor.py b/capa/features/extractors/dnfile/extractor.py index fae20db36..8f1efcf4f 100644 --- a/capa/features/extractors/dnfile/extractor.py +++ b/capa/features/extractors/dnfile/extractor.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional from pathlib import Path import dnfile @@ -41,11 +41,11 @@ class DnFileFeatureExtractorCache: def __init__(self, pe: dnfile.dnPE): - self.imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.native_imports: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.methods: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.fields: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} - self.types: Dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.native_imports: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.methods: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.fields: dict[int, Union[DnType, DnUnmanagedMethod]] = {} + self.types: dict[int, Union[DnType, DnUnmanagedMethod]] = {} for import_ in get_dotnet_managed_imports(pe): self.imports[import_.token] = import_ @@ -84,7 +84,7 @@ def __init__(self, path: Path): self.token_cache: DnFileFeatureExtractorCache = DnFileFeatureExtractorCache(self.pe) # pre-compute these because we'll yield them at *every* scope. - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_format()) self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_os(pe=self.pe)) self.global_features.extend(capa.features.extractors.dotnetfile.extract_file_arch(pe=self.pe)) @@ -100,7 +100,7 @@ def extract_file_features(self): def get_functions(self) -> Iterator[FunctionHandle]: # create a method lookup table - methods: Dict[Address, FunctionHandle] = {} + methods: dict[Address, FunctionHandle] = {} for token, method in get_dotnet_managed_method_bodies(self.pe): fh: FunctionHandle = FunctionHandle( address=DNTokenAddress(token), @@ -136,7 +136,7 @@ def get_functions(self) -> Iterator[FunctionHandle]: yield from methods.values() - def extract_function_features(self, fh) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.dnfile.function.extract_features(fh) def get_basic_blocks(self, f) -> Iterator[BBHandle]: @@ -157,5 +157,5 @@ def get_instructions(self, fh, bbh): inner=insn, ) - def extract_insn_features(self, fh, bbh, ih) -> Iterator[Tuple[Feature, Address]]: + def extract_insn_features(self, fh, bbh, ih) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.dnfile.insn.extract_features(fh, bbh, ih) diff --git a/capa/features/extractors/dnfile/file.py b/capa/features/extractors/dnfile/file.py index 47f63ca05..b4748b66e 100644 --- a/capa/features/extractors/dnfile/file.py +++ b/capa/features/extractors/dnfile/file.py @@ -8,7 +8,7 @@ from __future__ import annotations -from typing import Tuple, Iterator +from typing import Iterator import dnfile @@ -18,35 +18,35 @@ from capa.features.address import Address -def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[Tuple[Import, Address]]: +def extract_file_import_names(pe: dnfile.dnPE) -> Iterator[tuple[Import, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_import_names(pe=pe) -def extract_file_format(pe: dnfile.dnPE) -> Iterator[Tuple[Format, Address]]: +def extract_file_format(pe: dnfile.dnPE) -> Iterator[tuple[Format, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_format(pe=pe) -def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[Tuple[FunctionName, Address]]: +def extract_file_function_names(pe: dnfile.dnPE) -> Iterator[tuple[FunctionName, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_function_names(pe=pe) -def extract_file_strings(pe: dnfile.dnPE) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(pe: dnfile.dnPE) -> Iterator[tuple[String, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_strings(pe=pe) -def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[Tuple[Characteristic, Address]]: +def extract_file_mixed_mode_characteristic_features(pe: dnfile.dnPE) -> Iterator[tuple[Characteristic, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_mixed_mode_characteristic_features(pe=pe) -def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[Tuple[Namespace, Address]]: +def extract_file_namespace_features(pe: dnfile.dnPE) -> Iterator[tuple[Namespace, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_namespace_features(pe=pe) -def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[Tuple[Class, Address]]: +def extract_file_class_features(pe: dnfile.dnPE) -> Iterator[tuple[Class, Address]]: yield from capa.features.extractors.dotnetfile.extract_file_class_features(pe=pe) -def extract_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for file_handler in FILE_HANDLERS: for feature, address in file_handler(pe): yield feature, address diff --git a/capa/features/extractors/dnfile/function.py b/capa/features/extractors/dnfile/function.py index ed1bdf8a0..a1fc60d81 100644 --- a/capa/features/extractors/dnfile/function.py +++ b/capa/features/extractors/dnfile/function.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import Feature, Characteristic from capa.features.address import Address @@ -18,30 +18,30 @@ logger = logging.getLogger(__name__) -def extract_function_calls_to(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_calls_to(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract callers to a function""" for dest in fh.ctx["calls_to"]: yield Characteristic("calls to"), dest -def extract_function_calls_from(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_calls_from(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract callers from a function""" for src in fh.ctx["calls_from"]: yield Characteristic("calls from"), src -def extract_recursive_call(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_recursive_call(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract recursive function call""" if fh.address in fh.ctx["calls_to"]: yield Characteristic("recursive call"), fh.address -def extract_function_loop(fh: FunctionHandle) -> Iterator[Tuple[Characteristic, Address]]: +def extract_function_loop(fh: FunctionHandle) -> Iterator[tuple[Characteristic, Address]]: """extract loop indicators from a function""" raise NotImplementedError() -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/dnfile/helpers.py b/capa/features/extractors/dnfile/helpers.py index d7f4499ec..deabafa1a 100644 --- a/capa/features/extractors/dnfile/helpers.py +++ b/capa/features/extractors/dnfile/helpers.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import Dict, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional import dnfile from dncil.cil.body import CilMethodBody @@ -144,7 +144,7 @@ def get_dotnet_managed_imports(pe: dnfile.dnPE) -> Iterator[DnType]: ) -def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[Tuple[int, str]]: +def get_dotnet_methoddef_property_accessors(pe: dnfile.dnPE) -> Iterator[tuple[int, str]]: """get MethodDef methods used to access properties see https://www.ntcore.com/files/dotnetformat.htm @@ -194,7 +194,7 @@ def get_dotnet_managed_methods(pe: dnfile.dnPE) -> Iterator[DnType]: """ nested_class_table = get_dotnet_nested_class_table_index(pe) - accessor_map: Dict[int, str] = {} + accessor_map: dict[int, str] = {} for methoddef, methoddef_access in get_dotnet_methoddef_property_accessors(pe): accessor_map[methoddef] = methoddef_access @@ -252,7 +252,7 @@ def get_dotnet_fields(pe: dnfile.dnPE) -> Iterator[DnType]: yield DnType(token, typedefname, namespace=typedefnamespace, member=field.row.Name) -def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[Tuple[int, CilMethodBody]]: +def get_dotnet_managed_method_bodies(pe: dnfile.dnPE) -> Iterator[tuple[int, CilMethodBody]]: """get managed methods from MethodDef table""" for rid, method_def in iter_dotnet_table(pe, dnfile.mdtable.MethodDef.number): assert isinstance(method_def, dnfile.mdtable.MethodDefRow) @@ -332,7 +332,7 @@ def get_dotnet_table_row(pe: dnfile.dnPE, table_index: int, row_index: int) -> O def resolve_nested_typedef_name( nested_class_table: dict, index: int, typedef: dnfile.mdtable.TypeDefRow, pe: dnfile.dnPE -) -> Tuple[str, Tuple[str, ...]]: +) -> tuple[str, tuple[str, ...]]: """Resolves all nested TypeDef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" if index in nested_class_table: @@ -368,7 +368,7 @@ def resolve_nested_typedef_name( def resolve_nested_typeref_name( index: int, typeref: dnfile.mdtable.TypeRefRow, pe: dnfile.dnPE -) -> Tuple[str, Tuple[str, ...]]: +) -> tuple[str, tuple[str, ...]]: """Resolves all nested TypeRef class names. Returns the namespace as a str and the nested TypeRef name as a tuple""" # If the ResolutionScope decodes to a typeRef type then it is nested if isinstance(typeref.ResolutionScope.table, dnfile.mdtable.TypeRef): @@ -398,7 +398,7 @@ def resolve_nested_typeref_name( return str(typeref.TypeNamespace), (str(typeref.TypeName),) -def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> Dict[int, int]: +def get_dotnet_nested_class_table_index(pe: dnfile.dnPE) -> dict[int, int]: """Build index for EnclosingClass based off the NestedClass row index in the nestedclass table""" nested_class_table = {} @@ -442,7 +442,7 @@ def is_dotnet_mixed_mode(pe: dnfile.dnPE) -> bool: return not bool(pe.net.Flags.CLR_ILONLY) -def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[Tuple[int, dnfile.base.MDTableRow]]: +def iter_dotnet_table(pe: dnfile.dnPE, table_index: int) -> Iterator[tuple[int, dnfile.base.MDTableRow]]: assert pe.net is not None assert pe.net.mdtables is not None diff --git a/capa/features/extractors/dnfile/insn.py b/capa/features/extractors/dnfile/insn.py index e6e9f9406..257af99fa 100644 --- a/capa/features/extractors/dnfile/insn.py +++ b/capa/features/extractors/dnfile/insn.py @@ -9,7 +9,7 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Tuple, Union, Iterator, Optional +from typing import TYPE_CHECKING, Union, Iterator, Optional if TYPE_CHECKING: from capa.features.extractors.dnfile.extractor import DnFileFeatureExtractorCache @@ -61,7 +61,7 @@ def get_callee( return callee -def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction API features""" if ih.inner.opcode not in ( OpCodes.Call, @@ -83,7 +83,7 @@ def extract_insn_api_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterato yield API(name), ih.address -def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction property features""" name: Optional[str] = None access: Optional[str] = None @@ -118,7 +118,7 @@ def extract_insn_property_features(fh: FunctionHandle, bh, ih: InsnHandle) -> It def extract_insn_namespace_class_features( fh: FunctionHandle, bh, ih: InsnHandle -) -> Iterator[Tuple[Union[Namespace, Class], Address]]: +) -> Iterator[tuple[Union[Namespace, Class], Address]]: """parse instruction namespace and class features""" type_: Optional[Union[DnType, DnUnmanagedMethod]] = None @@ -173,13 +173,13 @@ def extract_insn_namespace_class_features( yield Namespace(type_.namespace), ih.address -def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_number_features(fh, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction number features""" if ih.inner.is_ldc(): yield Number(ih.inner.get_ldc()), ih.address -def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse instruction string features""" if not ih.inner.is_ldstr(): return @@ -197,7 +197,7 @@ def extract_insn_string_features(fh: FunctionHandle, bh, ih: InsnHandle) -> Iter def extract_unmanaged_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Characteristic, Address]]: +) -> Iterator[tuple[Characteristic, Address]]: if ih.inner.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp): return @@ -209,7 +209,7 @@ def extract_unmanaged_call_characteristic_features( yield Characteristic("unmanaged call"), ih.address -def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, addr in inst_handler(fh, bbh, ih): diff --git a/capa/features/extractors/dnfile/types.py b/capa/features/extractors/dnfile/types.py index 12aac5d61..7b5758ea0 100644 --- a/capa/features/extractors/dnfile/types.py +++ b/capa/features/extractors/dnfile/types.py @@ -6,17 +6,17 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Optional +from typing import Optional class DnType: def __init__( - self, token: int, class_: Tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None + self, token: int, class_: tuple[str, ...], namespace: str = "", member: str = "", access: Optional[str] = None ): self.token: int = token self.access: Optional[str] = access self.namespace: str = namespace - self.class_: Tuple[str, ...] = class_ + self.class_: tuple[str, ...] = class_ if member == ".ctor": member = "ctor" @@ -44,7 +44,7 @@ def __repr__(self): return str(self) @staticmethod - def format_name(class_: Tuple[str, ...], namespace: str = "", member: str = ""): + def format_name(class_: tuple[str, ...], namespace: str = "", member: str = ""): if len(class_) > 1: class_str = "/".join(class_) # Concat items in tuple, separated by a "/" else: diff --git a/capa/features/extractors/dotnetfile.py b/capa/features/extractors/dotnetfile.py index 5ab998579..bfdfcfa4a 100644 --- a/capa/features/extractors/dotnetfile.py +++ b/capa/features/extractors/dotnetfile.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from pathlib import Path import dnfile @@ -48,12 +48,12 @@ logger = logging.getLogger(__name__) -def extract_file_format(**kwargs) -> Iterator[Tuple[Format, Address]]: +def extract_file_format(**kwargs) -> Iterator[tuple[Format, Address]]: yield Format(FORMAT_DOTNET), NO_ADDRESS yield Format(FORMAT_PE), NO_ADDRESS -def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Import, Address]]: +def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Import, Address]]: for method in get_dotnet_managed_imports(pe): # like System.IO.File::OpenRead yield Import(str(method)), DNTokenAddress(method.token) @@ -64,12 +64,12 @@ def extract_file_import_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Impor yield Import(name), DNTokenAddress(imp.token) -def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[FunctionName, Address]]: +def extract_file_function_names(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[FunctionName, Address]]: for method in get_dotnet_managed_methods(pe): yield FunctionName(str(method)), DNTokenAddress(method.token) -def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Namespace, Address]]: +def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Namespace, Address]]: """emit namespace features from TypeRef and TypeDef tables""" # namespaces may be referenced multiple times, so we need to filter @@ -93,7 +93,7 @@ def extract_file_namespace_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple yield Namespace(namespace), NO_ADDRESS -def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Class, Address]]: +def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Class, Address]]: """emit class features from TypeRef and TypeDef tables""" nested_class_table = get_dotnet_nested_class_table_index(pe) @@ -116,11 +116,11 @@ def extract_file_class_features(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Cla yield Class(DnType.format_name(typerefname, namespace=typerefnamespace)), DNTokenAddress(token) -def extract_file_os(**kwargs) -> Iterator[Tuple[OS, Address]]: +def extract_file_os(**kwargs) -> Iterator[tuple[OS, Address]]: yield OS(OS_ANY), NO_ADDRESS -def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address]]: +def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[Arch, Address]]: # to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020 # .NET 4.5 added option: any CPU, 32-bit preferred assert pe.net is not None @@ -134,18 +134,18 @@ def extract_file_arch(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[Arch, Address yield Arch(ARCH_ANY), NO_ADDRESS -def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[Tuple[String, Address]]: +def extract_file_strings(pe: dnfile.dnPE, **kwargs) -> Iterator[tuple[String, Address]]: yield from capa.features.extractors.common.extract_file_strings(pe.__data__) def extract_file_mixed_mode_characteristic_features( pe: dnfile.dnPE, **kwargs -) -> Iterator[Tuple[Characteristic, Address]]: +) -> Iterator[tuple[Characteristic, Address]]: if is_dotnet_mixed_mode(pe): yield Characteristic("mixed mode"), NO_ADDRESS -def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_file_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for file_handler in FILE_HANDLERS: for feature, addr in file_handler(pe=pe): # type: ignore yield feature, addr @@ -162,7 +162,7 @@ def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: ) -def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, Address]]: +def extract_global_features(pe: dnfile.dnPE) -> Iterator[tuple[Feature, Address]]: for handler in GLOBAL_HANDLERS: for feature, va in handler(pe=pe): # type: ignore yield feature, va @@ -204,7 +204,7 @@ def is_dotnet_file(self) -> bool: def is_mixed_mode(self) -> bool: return is_dotnet_mixed_mode(self.pe) - def get_runtime_version(self) -> Tuple[int, int]: + def get_runtime_version(self) -> tuple[int, int]: assert self.pe.net is not None assert self.pe.net.struct is not None assert self.pe.net.struct.MajorRuntimeVersion is not None diff --git a/capa/features/extractors/drakvuf/call.py b/capa/features/extractors/drakvuf/call.py index 7d0e2a5ee..c6af7035c 100644 --- a/capa/features/extractors/drakvuf/call.py +++ b/capa/features/extractors/drakvuf/call.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.features.insn import API, Number @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: """ This method extracts the given call's features (such as API name and arguments), and returns them as API, Number, and String features. @@ -49,7 +49,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/drakvuf/extractor.py b/capa/features/extractors/drakvuf/extractor.py index 1a4f5062e..e7fb69eff 100644 --- a/capa/features/extractors/drakvuf/extractor.py +++ b/capa/features/extractors/drakvuf/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Union, Iterator +from typing import Union, Iterator import capa.features.extractors.drakvuf.call import capa.features.extractors.drakvuf.file @@ -39,7 +39,7 @@ def __init__(self, report: DrakvufReport): self.report: DrakvufReport = report # sort the api calls to prevent going through the entire list each time - self.sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = index_calls(report) + self.sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = index_calls(report) # pre-compute these because we'll yield them at *every* scope. self.global_features = list(capa.features.extractors.drakvuf.global_.extract_features(self.report)) @@ -48,16 +48,16 @@ def get_base_address(self) -> Union[AbsoluteVirtualAddress, _NoAddress, None]: # DRAKVUF currently does not yield information about the PE's address return NO_ADDRESS - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.file.extract_features(self.report) def get_processes(self) -> Iterator[ProcessHandle]: yield from capa.features.extractors.drakvuf.file.get_processes(self.sorted_calls) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.process.extract_features(ph) def get_process_name(self, ph: ProcessHandle) -> str: @@ -66,7 +66,7 @@ def get_process_name(self, ph: ProcessHandle) -> str: def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: yield from capa.features.extractors.drakvuf.process.get_threads(self.sorted_calls, ph) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -87,10 +87,10 @@ def get_call_name(self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.drakvuf.call.extract_features(ph, th, ch) @classmethod - def from_report(cls, report: Iterator[Dict]) -> "DrakvufExtractor": + def from_report(cls, report: Iterator[dict]) -> "DrakvufExtractor": dr = DrakvufReport.from_raw_report(report) return DrakvufExtractor(report=dr) diff --git a/capa/features/extractors/drakvuf/file.py b/capa/features/extractors/drakvuf/file.py index d93c354b2..f0d310ba9 100644 --- a/capa/features/extractors/drakvuf/file.py +++ b/capa/features/extractors/drakvuf/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Iterator +from typing import Iterator from capa.features.file import Import from capa.features.common import Feature @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) -> Iterator[ProcessHandle]: +def get_processes(calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]]) -> Iterator[ProcessHandle]: """ Get all the created processes for a sample. """ @@ -28,7 +28,7 @@ def get_processes(calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]) yield ProcessHandle(proc_addr, inner={"process_name": sample_call.process_name}) -def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: """ Extract imported function names. """ @@ -43,7 +43,7 @@ def extract_import_names(report: DrakvufReport) -> Iterator[Tuple[Feature, Addre yield Import(name), AbsoluteVirtualAddress(function_address) -def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(report): yield feature, addr diff --git a/capa/features/extractors/drakvuf/global_.py b/capa/features/extractors/drakvuf/global_.py index 00d18afc7..0475583b6 100644 --- a/capa/features/extractors/drakvuf/global_.py +++ b/capa/features/extractors/drakvuf/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import OS, FORMAT_PE, ARCH_AMD64, OS_WINDOWS, Arch, Format, Feature from capa.features.address import NO_ADDRESS, Address @@ -16,22 +16,22 @@ logger = logging.getLogger(__name__) -def extract_format(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_format(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Format(FORMAT_PE), NO_ADDRESS -def extract_os(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_os(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only PE files: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield OS(OS_WINDOWS), NO_ADDRESS -def extract_arch(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: # DRAKVUF sandbox currently supports only x64 Windows as the guest: https://drakvuf-sandbox.readthedocs.io/en/latest/usage/getting_started.html yield Arch(ARCH_AMD64), NO_ADDRESS -def extract_features(report: DrakvufReport) -> Iterator[Tuple[Feature, Address]]: +def extract_features(report: DrakvufReport) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(report): yield feature, addr diff --git a/capa/features/extractors/drakvuf/helpers.py b/capa/features/extractors/drakvuf/helpers.py index 59708f5df..a47c62be7 100644 --- a/capa/features/extractors/drakvuf/helpers.py +++ b/capa/features/extractors/drakvuf/helpers.py @@ -7,16 +7,15 @@ # See the License for the specific language governing permissions and limitations under the License. import itertools -from typing import Dict, List from capa.features.address import ThreadAddress, ProcessAddress from capa.features.extractors.drakvuf.models import Call, DrakvufReport -def index_calls(report: DrakvufReport) -> Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]]: +def index_calls(report: DrakvufReport) -> dict[ProcessAddress, dict[ThreadAddress, list[Call]]]: # this method organizes calls into processes and threads, and then sorts them based on # timestamp so that we can address individual calls per index (CallAddress requires call index) - result: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]] = {} + result: dict[ProcessAddress, dict[ThreadAddress, list[Call]]] = {} for call in itertools.chain(report.syscalls, report.apicalls): if call.pid == 0: # DRAKVUF captures api/native calls from all processes running on the system. diff --git a/capa/features/extractors/drakvuf/models.py b/capa/features/extractors/drakvuf/models.py index fbfd649c7..0af4b11e7 100644 --- a/capa/features/extractors/drakvuf/models.py +++ b/capa/features/extractors/drakvuf/models.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Any, Dict, List, Iterator +from typing import Any, Iterator from pydantic import Field, BaseModel, ConfigDict, model_validator @@ -47,7 +47,7 @@ class LoadedDLL(ConciseModel): plugin_name: str = Field(alias="Plugin") event: str = Field(alias="Event") name: str = Field(alias="DllName") - imports: Dict[str, int] = Field(alias="Rva") + imports: dict[str, int] = Field(alias="Rva") class Call(ConciseModel): @@ -58,18 +58,18 @@ class Call(ConciseModel): pid: int = Field(alias="PID") tid: int = Field(alias="TID") name: str = Field(alias="Method") - arguments: Dict[str, str] + arguments: dict[str, str] class WinApiCall(Call): # This class models Windows API calls captured by DRAKVUF (DLLs, etc.). - arguments: Dict[str, str] = Field(alias="Arguments") + arguments: dict[str, str] = Field(alias="Arguments") event: str = Field(alias="Event") return_value: str = Field(alias="ReturnValue") @model_validator(mode="before") @classmethod - def build_arguments(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def build_arguments(cls, values: dict[str, Any]) -> dict[str, Any]: args = values["Arguments"] values["Arguments"] = dict(arg.split("=", 1) for arg in args) return values @@ -100,7 +100,7 @@ class SystemCall(Call): @model_validator(mode="before") @classmethod - def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: + def build_extra(cls, values: dict[str, Any]) -> dict[str, Any]: # DRAKVUF stores argument names and values as entries in the syscall's entry. # This model validator collects those arguments into a list in the model. values["arguments"] = { @@ -110,13 +110,13 @@ def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: class DrakvufReport(ConciseModel): - syscalls: List[SystemCall] = [] - apicalls: List[WinApiCall] = [] - discovered_dlls: List[DiscoveredDLL] = [] - loaded_dlls: List[LoadedDLL] = [] + syscalls: list[SystemCall] = [] + apicalls: list[WinApiCall] = [] + discovered_dlls: list[DiscoveredDLL] = [] + loaded_dlls: list[LoadedDLL] = [] @classmethod - def from_raw_report(cls, entries: Iterator[Dict]) -> "DrakvufReport": + def from_raw_report(cls, entries: Iterator[dict]) -> "DrakvufReport": report = cls() for entry in entries: diff --git a/capa/features/extractors/drakvuf/process.py b/capa/features/extractors/drakvuf/process.py index 8b0819264..292e8af3c 100644 --- a/capa/features/extractors/drakvuf/process.py +++ b/capa/features/extractors/drakvuf/process.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Iterator +from typing import Iterator from capa.features.common import String, Feature from capa.features.address import Address, ThreadAddress, ProcessAddress @@ -18,7 +18,7 @@ def get_threads( - calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle + calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle ) -> Iterator[ThreadHandle]: """ Get the threads associated with a given process. @@ -27,11 +27,11 @@ def get_threads( yield ThreadHandle(address=thread_addr, inner={}) -def extract_process_name(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_process_name(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: yield String(ph.inner["process_name"]), ph.address -def extract_features(ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: for handler in PROCESS_HANDLERS: for feature, addr in handler(ph): yield feature, addr diff --git a/capa/features/extractors/drakvuf/thread.py b/capa/features/extractors/drakvuf/thread.py index 5e72b51ab..830098579 100644 --- a/capa/features/extractors/drakvuf/thread.py +++ b/capa/features/extractors/drakvuf/thread.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Iterator +from typing import Iterator from capa.features.address import ThreadAddress, ProcessAddress, DynamicCallAddress from capa.features.extractors.base_extractor import CallHandle, ThreadHandle, ProcessHandle @@ -17,7 +17,7 @@ def get_calls( - sorted_calls: Dict[ProcessAddress, Dict[ThreadAddress, List[Call]]], ph: ProcessHandle, th: ThreadHandle + sorted_calls: dict[ProcessAddress, dict[ThreadAddress, list[Call]]], ph: ProcessHandle, th: ThreadHandle ) -> Iterator[CallHandle]: for i, call in enumerate(sorted_calls[ph.address][th.address]): call_addr = DynamicCallAddress(thread=th.address, id=i) diff --git a/capa/features/extractors/elf.py b/capa/features/extractors/elf.py index 82c8c3da9..8ce0c7b4a 100644 --- a/capa/features/extractors/elf.py +++ b/capa/features/extractors/elf.py @@ -10,7 +10,7 @@ import itertools import collections from enum import Enum -from typing import TYPE_CHECKING, Set, Dict, List, Tuple, BinaryIO, Iterator, Optional +from typing import TYPE_CHECKING, BinaryIO, Iterator, Optional from dataclasses import dataclass if TYPE_CHECKING: @@ -394,7 +394,7 @@ def linker(self): return read_cstr(phdr.buf, 0) @property - def versions_needed(self) -> Dict[str, Set[str]]: + def versions_needed(self) -> dict[str, set[str]]: # symbol version requirements are stored in the .gnu.version_r section, # which has type SHT_GNU_verneed (0x6ffffffe). # @@ -452,7 +452,7 @@ def versions_needed(self) -> Dict[str, Set[str]]: return {} @property - def dynamic_entries(self) -> Iterator[Tuple[int, int]]: + def dynamic_entries(self) -> Iterator[tuple[int, int]]: """ read the entries from the dynamic section, yielding the tag and value for each entry. @@ -547,7 +547,7 @@ def needed(self) -> Iterator[str]: logger.warning("failed to read DT_NEEDED entry: %s", str(e)) @property - def symtab(self) -> Optional[Tuple[Shdr, Shdr]]: + def symtab(self) -> Optional[tuple[Shdr, Shdr]]: """ fetch the Shdr for the symtab and the associated strtab. """ @@ -682,7 +682,7 @@ def __init__( symtab: Shdr, strtab: Shdr, ) -> None: - self.symbols: List[Symbol] = [] + self.symbols: list[Symbol] = [] self.symtab = symtab self.strtab = strtab diff --git a/capa/features/extractors/elffile.py b/capa/features/extractors/elffile.py index 630f8024e..b63039a19 100644 --- a/capa/features/extractors/elffile.py +++ b/capa/features/extractors/elffile.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io import logging -from typing import Tuple, Iterator +from typing import Iterator from pathlib import Path from elftools.elf.elffile import ELFFile, DynamicSegment, SymbolTableSection @@ -166,7 +166,7 @@ def extract_file_arch(elf: ELFFile, **kwargs): logger.warning("unsupported architecture: %s", arch) -def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]: +def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]: for file_handler in FILE_HANDLERS: for feature, addr in file_handler(elf=elf, buf=buf): # type: ignore yield feature, addr @@ -182,7 +182,7 @@ def extract_file_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, i ) -def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[Tuple[Feature, int]]: +def extract_global_features(elf: ELFFile, buf: bytes) -> Iterator[tuple[Feature, int]]: for global_handler in GLOBAL_HANDLERS: for feature, addr in global_handler(elf=elf, buf=buf): # type: ignore yield feature, addr diff --git a/capa/features/extractors/ghidra/basicblock.py b/capa/features/extractors/ghidra/basicblock.py index b3271586e..a89586a60 100644 --- a/capa/features/extractors/ghidra/basicblock.py +++ b/capa/features/extractors/ghidra/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import ghidra from ghidra.program.model.lang import OperandType @@ -97,7 +97,7 @@ def _bb_has_tight_loop(bb: ghidra.program.model.block.CodeBlock): return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" bb: ghidra.program.model.block.CodeBlock = bbh.inner @@ -105,7 +105,7 @@ def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[ yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for tight loop indicators""" bb: ghidra.program.model.block.CodeBlock = bbh.inner @@ -119,7 +119,7 @@ def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[F ) -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given basic block. @@ -127,7 +127,7 @@ def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Featur bb: the basic block to process. yields: - Tuple[Feature, int]: the features and their location found in this basic block. + tuple[Feature, int]: the features and their location found in this basic block. """ yield BasicBlock(), bbh.address for bb_handler in BASIC_BLOCK_HANDLERS: diff --git a/capa/features/extractors/ghidra/extractor.py b/capa/features/extractors/ghidra/extractor.py index c7ed637bb..c694e0865 100644 --- a/capa/features/extractors/ghidra/extractor.py +++ b/capa/features/extractors/ghidra/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import capa.features.extractors.ghidra.file import capa.features.extractors.ghidra.insn @@ -40,7 +40,7 @@ def __init__(self): ) ) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ghidra.file.extract_file_format()) self.global_features.extend(capa.features.extractors.ghidra.global_.extract_os()) self.global_features.extend(capa.features.extractors.ghidra.global_.extract_arch()) @@ -73,7 +73,7 @@ def get_function(addr: int) -> FunctionHandle: func = getFunctionContaining(toAddr(addr)) # type: ignore [name-defined] # noqa: F821 return FunctionHandle(address=AbsoluteVirtualAddress(func.getEntryPoint().getOffset()), inner=func) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ghidra.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -81,7 +81,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: yield from ghidra_helpers.get_function_blocks(fh) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ghidra.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: diff --git a/capa/features/extractors/ghidra/file.py b/capa/features/extractors/ghidra/file.py index 0e7407b28..2e18cecdc 100644 --- a/capa/features/extractors/ghidra/file.py +++ b/capa/features/extractors/ghidra/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import re import struct -from typing import List, Tuple, Iterator +from typing import Iterator from ghidra.program.model.symbol import SourceType, SymbolType @@ -22,7 +22,7 @@ MAX_OFFSET_PE_AFTER_MZ = 0x200 -def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) -> Iterator[Tuple[int, int]]: +def find_embedded_pe(block_bytez: bytes, mz_xor: list[tuple[bytes, bytes, int]]) -> Iterator[tuple[int, int]]: """check segment for embedded PE adapted for Ghidra from: @@ -60,11 +60,11 @@ def find_embedded_pe(block_bytez: bytes, mz_xor: List[Tuple[bytes, bytes, int]]) yield off, i -def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: """extract embedded PE features""" # pre-compute XOR pairs - mz_xor: List[Tuple[bytes, bytes, int]] = [ + mz_xor: list[tuple[bytes, bytes, int]] = [ ( capa.features.extractors.helpers.xor_static(b"MZ", i), capa.features.extractors.helpers.xor_static(b"PE", i), @@ -84,14 +84,14 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("embedded pe"), FileOffsetAddress(ea) -def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: """extract function exports""" st = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821 for addr in st.getExternalEntryPointIterator(): yield Export(st.getPrimarySymbol(addr).getName()), AbsoluteVirtualAddress(addr.getOffset()) -def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names() -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -116,14 +116,14 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: yield Import(name), AbsoluteVirtualAddress(addr) -def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names() -> Iterator[tuple[Feature, Address]]: """extract section names""" for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821 yield Section(block.getName()), AbsoluteVirtualAddress(block.getStart().getOffset()) -def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings() -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings""" for block in currentProgram().getMemory().getBlocks(): # type: ignore [name-defined] # noqa: F821 @@ -141,7 +141,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: yield String(s.s), FileOffsetAddress(offset) -def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -162,7 +162,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: yield FunctionName(name[1:]), addr -def extract_file_format() -> Iterator[Tuple[Feature, Address]]: +def extract_file_format() -> Iterator[tuple[Feature, Address]]: ef = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821 if "PE" in ef: yield Format(FORMAT_PE), NO_ADDRESS @@ -175,7 +175,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {ef}") -def extract_features() -> Iterator[Tuple[Feature, Address]]: +def extract_features() -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(): diff --git a/capa/features/extractors/ghidra/function.py b/capa/features/extractors/ghidra/function.py index d31ba86a6..59f0c7212 100644 --- a/capa/features/extractors/ghidra/function.py +++ b/capa/features/extractors/ghidra/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import ghidra from ghidra.program.model.block import BasicBlockModel, SimpleBlockIterator @@ -49,7 +49,7 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), AbsoluteVirtualAddress(f.getEntryPoint().getOffset()) -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/ghidra/global_.py b/capa/features/extractors/ghidra/global_.py index 0df58a084..65e8d6a21 100644 --- a/capa/features/extractors/ghidra/global_.py +++ b/capa/features/extractors/ghidra/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import contextlib -from typing import Tuple, Iterator +from typing import Iterator import capa.ghidra.helpers import capa.features.extractors.elf @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def extract_os() -> Iterator[Tuple[Feature, Address]]: +def extract_os() -> Iterator[tuple[Feature, Address]]: format_name: str = currentProgram().getExecutableFormat() # type: ignore [name-defined] # noqa: F821 if "PE" in format_name: @@ -45,7 +45,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: return -def extract_arch() -> Iterator[Tuple[Feature, Address]]: +def extract_arch() -> Iterator[tuple[Feature, Address]]: lang_id = currentProgram().getMetadata().get("Language ID") # type: ignore [name-defined] # noqa: F821 if "x86" in lang_id and "64" in lang_id: diff --git a/capa/features/extractors/ghidra/helpers.py b/capa/features/extractors/ghidra/helpers.py index 22e0ed6d4..7f4a3790e 100644 --- a/capa/features/extractors/ghidra/helpers.py +++ b/capa/features/extractors/ghidra/helpers.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Iterator +from typing import Iterator import ghidra import java.lang @@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import BBHandle, InsnHandle, FunctionHandle -def ints_to_bytes(bytez: List[int]) -> bytes: +def ints_to_bytes(bytez: list[int]) -> bytes: """convert Java signed ints to Python bytes args: @@ -83,10 +83,10 @@ def get_insn_in_range(bbh: BBHandle) -> Iterator[InsnHandle]: yield InsnHandle(address=AbsoluteVirtualAddress(insn.getAddress().getOffset()), inner=insn) -def get_file_imports() -> Dict[int, List[str]]: +def get_file_imports() -> dict[int, list[str]]: """get all import names & addrs""" - import_dict: Dict[int, List[str]] = {} + import_dict: dict[int, list[str]] = {} for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821 for r in f.getSymbol().getReferences(): @@ -110,7 +110,7 @@ def get_file_imports() -> Dict[int, List[str]]: return import_dict -def get_file_externs() -> Dict[int, List[str]]: +def get_file_externs() -> dict[int, list[str]]: """ Gets function names & addresses of statically-linked library functions @@ -124,7 +124,7 @@ def get_file_externs() -> Dict[int, List[str]]: - Note: See Symbol Table labels """ - extern_dict: Dict[int, List[str]] = {} + extern_dict: dict[int, list[str]] = {} for sym in currentProgram().getSymbolTable().getAllSymbols(True): # type: ignore [name-defined] # noqa: F821 # .isExternal() misses more than this config for the function symbols @@ -143,7 +143,7 @@ def get_file_externs() -> Dict[int, List[str]]: return extern_dict -def map_fake_import_addrs() -> Dict[int, List[int]]: +def map_fake_import_addrs() -> dict[int, list[int]]: """ Map ghidra's fake import entrypoints to their real addresses @@ -162,7 +162,7 @@ def map_fake_import_addrs() -> Dict[int, List[int]]: - 0x473090 -> PTR_CreateServiceW_00473090 - 'EXTERNAL:00000025' -> External Address (ghidra.program.model.address.SpecialAddress) """ - fake_dict: Dict[int, List[int]] = {} + fake_dict: dict[int, list[int]] = {} for f in currentProgram().getFunctionManager().getExternalFunctions(): # type: ignore [name-defined] # noqa: F821 for r in f.getSymbol().getReferences(): @@ -174,9 +174,9 @@ def map_fake_import_addrs() -> Dict[int, List[int]]: def check_addr_for_api( addr: ghidra.program.model.address.Address, - fakes: Dict[int, List[int]], - imports: Dict[int, List[str]], - externs: Dict[int, List[str]], + fakes: dict[int, list[int]], + imports: dict[int, list[str]], + externs: dict[int, list[str]], ) -> bool: offset = addr.getOffset() diff --git a/capa/features/extractors/ghidra/insn.py b/capa/features/extractors/ghidra/insn.py index c9f2dada3..cd8d65d02 100644 --- a/capa/features/extractors/ghidra/insn.py +++ b/capa/features/extractors/ghidra/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Any, Dict, Tuple, Iterator +from typing import Any, Iterator import ghidra from ghidra.program.model.lang import OperandType @@ -26,21 +26,21 @@ OPERAND_TYPE_DYNAMIC_ADDRESS = OperandType.DYNAMIC | OperandType.ADDRESS -def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_imports(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the import cache for this context""" if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ghidra.helpers.get_file_imports() return ctx["imports_cache"] -def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_externs(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the externs cache for this context""" if "externs_cache" not in ctx: ctx["externs_cache"] = capa.features.extractors.ghidra.helpers.get_file_externs() return ctx["externs_cache"] -def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_fakes(ctx: dict[str, Any]) -> dict[int, Any]: """Populate the fake import addrs cache for this context""" if "fakes_cache" not in ctx: ctx["fakes_cache"] = capa.features.extractors.ghidra.helpers.map_fake_import_addrs() @@ -48,7 +48,7 @@ def get_fakes(ctx: Dict[str, Any]) -> Dict[int, Any]: def check_for_api_call( - insn, externs: Dict[int, Any], fakes: Dict[int, Any], imports: Dict[int, Any], imp_or_ex: bool + insn, externs: dict[int, Any], fakes: dict[int, Any], imports: dict[int, Any], imp_or_ex: bool ) -> Iterator[Any]: """check instruction for API call @@ -110,7 +110,7 @@ def check_for_api_call( yield info -def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: insn: ghidra.program.database.code.InstructionDB = ih.inner if not capa.features.extractors.ghidra.helpers.is_call_or_jmp(insn): @@ -131,7 +131,7 @@ def extract_insn_api_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) yield API(ext), ih.address -def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -186,7 +186,7 @@ def extract_insn_number_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl yield OperandOffset(i, const), addr -def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -219,7 +219,7 @@ def extract_insn_offset_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl yield OperandOffset(i, op_off), ih.address -def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences @@ -234,7 +234,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle yield Bytes(extracted_bytes), ih.address -def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -249,7 +249,7 @@ def extract_insn_string_features(fh: FunctionHandle, bb: BBHandle, ih: InsnHandl def extract_insn_mnemonic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -258,7 +258,7 @@ def extract_insn_mnemonic_features( def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -279,7 +279,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_segment_access_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -294,7 +294,7 @@ def extract_insn_segment_access_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -310,7 +310,7 @@ def extract_insn_peb_access_characteristic_features( def extract_insn_cross_section_cflow( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -364,7 +364,7 @@ def extract_function_calls_from( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -393,7 +393,7 @@ def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -442,7 +442,7 @@ def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bb: BBHandle, ih: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: f: ghidra.program.database.function.FunctionDB = fh.inner insn: ghidra.program.database.code.InstructionDB = ih.inner @@ -461,7 +461,7 @@ def extract_features( fh: FunctionHandle, bb: BBHandle, insn: InsnHandle, -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: for insn_handler in INSTRUCTION_HANDLERS: for feature, addr in insn_handler(fh, bb, insn): yield feature, addr diff --git a/capa/features/extractors/helpers.py b/capa/features/extractors/helpers.py index 09f76f589..3f1060200 100644 --- a/capa/features/extractors/helpers.py +++ b/capa/features/extractors/helpers.py @@ -8,7 +8,7 @@ import struct import builtins -from typing import Tuple, Iterator +from typing import Iterator MIN_STACKSTRING_LEN = 8 @@ -119,7 +119,7 @@ def twos_complement(val: int, bits: int) -> int: return val -def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[Tuple[int, int]]: +def carve_pe(pbytes: bytes, offset: int = 0) -> Iterator[tuple[int, int]]: """ Generate (offset, key) tuples of embedded PEs diff --git a/capa/features/extractors/ida/basicblock.py b/capa/features/extractors/ida/basicblock.py index 88a1247ea..2a11bb509 100644 --- a/capa/features/extractors/ida/basicblock.py +++ b/capa/features/extractors/ida/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import idaapi @@ -80,19 +80,19 @@ def bb_contains_stackstring(f: idaapi.func_t, bb: idaapi.BasicBlock) -> bool: return False -def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_stackstring(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract stackstring indicators from basic block""" if bb_contains_stackstring(fh.inner, bbh.inner): yield Characteristic("stack string"), bbh.address -def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract tight loop indicators from a basic block""" if capa.features.extractors.ida.helpers.is_basic_block_tight_loop(bbh.inner): yield Characteristic("tight loop"), bbh.address -def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: """extract basic block features""" for bb_handler in BASIC_BLOCK_HANDLERS: for feature, addr in bb_handler(fh, bbh): diff --git a/capa/features/extractors/ida/extractor.py b/capa/features/extractors/ida/extractor.py index a2b4f7913..5222b3c8f 100644 --- a/capa/features/extractors/ida/extractor.py +++ b/capa/features/extractors/ida/extractor.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator import idaapi @@ -36,7 +36,7 @@ def __init__(self): sha256=capa.ida.helpers.retrieve_input_file_sha256(), ) ) - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.ida.file.extract_file_format()) self.global_features.extend(capa.features.extractors.ida.global_.extract_os()) self.global_features.extend(capa.features.extractors.ida.global_.extract_arch()) @@ -61,7 +61,7 @@ def get_function(ea: int) -> FunctionHandle: f = idaapi.get_func(ea) return FunctionHandle(address=AbsoluteVirtualAddress(f.start_ea), inner=f) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ida.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -70,7 +70,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: for bb in ida_helpers.get_function_blocks(fh.inner): yield BBHandle(address=AbsoluteVirtualAddress(bb.start_ea), inner=bb) - def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.ida.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: diff --git a/capa/features/extractors/ida/file.py b/capa/features/extractors/ida/file.py index 78200e438..30408060a 100644 --- a/capa/features/extractors/ida/file.py +++ b/capa/features/extractors/ida/file.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import struct -from typing import Tuple, Iterator +from typing import Iterator import idc import idaapi @@ -26,7 +26,7 @@ MAX_OFFSET_PE_AFTER_MZ = 0x200 -def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: +def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[tuple[int, int]]: """check segment for embedded PE adapted for IDA from: @@ -71,7 +71,7 @@ def check_segment_for_pe(seg: idaapi.segment_t) -> Iterator[Tuple[int, int]]: yield off, i -def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe() -> Iterator[tuple[Feature, Address]]: """extract embedded PE features IDA must load resource sections for this to be complete @@ -83,7 +83,7 @@ def extract_file_embedded_pe() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("embedded pe"), FileOffsetAddress(ea) -def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names() -> Iterator[tuple[Feature, Address]]: """extract function exports""" for _, ordinal, ea, name in idautils.Entries(): forwarded_name = ida_entry.get_entry_forwarder(ordinal) @@ -95,7 +95,7 @@ def extract_file_export_names() -> Iterator[Tuple[Feature, Address]]: yield Characteristic("forwarded export"), AbsoluteVirtualAddress(ea) -def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names() -> Iterator[tuple[Feature, Address]]: """extract function imports 1. imports by ordinal: @@ -131,7 +131,7 @@ def extract_file_import_names() -> Iterator[Tuple[Feature, Address]]: yield Import(info[1]), AbsoluteVirtualAddress(ea) -def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names() -> Iterator[tuple[Feature, Address]]: """extract section names IDA must load resource sections for this to be complete @@ -142,7 +142,7 @@ def extract_file_section_names() -> Iterator[Tuple[Feature, Address]]: yield Section(idaapi.get_segm_name(seg)), AbsoluteVirtualAddress(seg.start_ea) -def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings() -> Iterator[tuple[Feature, Address]]: """extract ASCII and UTF-16 LE strings IDA must load resource sections for this to be complete @@ -160,7 +160,7 @@ def extract_file_strings() -> Iterator[Tuple[Feature, Address]]: yield String(s.s), FileOffsetAddress(seg.start_ea + s.offset) -def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names() -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -177,7 +177,7 @@ def extract_file_function_names() -> Iterator[Tuple[Feature, Address]]: yield FunctionName(name[1:]), addr -def extract_file_format() -> Iterator[Tuple[Feature, Address]]: +def extract_file_format() -> Iterator[tuple[Feature, Address]]: filetype = capa.ida.helpers.get_filetype() if filetype in (idaapi.f_PE, idaapi.f_COFF): @@ -191,7 +191,7 @@ def extract_file_format() -> Iterator[Tuple[Feature, Address]]: raise NotImplementedError(f"unexpected file format: {filetype}") -def extract_features() -> Iterator[Tuple[Feature, Address]]: +def extract_features() -> Iterator[tuple[Feature, Address]]: """extract file features""" for file_handler in FILE_HANDLERS: for feature, addr in file_handler(): diff --git a/capa/features/extractors/ida/function.py b/capa/features/extractors/ida/function.py index cb4d63290..f636791da 100644 --- a/capa/features/extractors/ida/function.py +++ b/capa/features/extractors/ida/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import idaapi import idautils @@ -43,7 +43,7 @@ def extract_recursive_call(fh: FunctionHandle): yield Characteristic("recursive call"), fh.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): yield feature, addr diff --git a/capa/features/extractors/ida/global_.py b/capa/features/extractors/ida/global_.py index 3c5f4623e..a7724e126 100644 --- a/capa/features/extractors/ida/global_.py +++ b/capa/features/extractors/ida/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import contextlib -from typing import Tuple, Iterator +from typing import Iterator import ida_loader @@ -19,7 +19,7 @@ logger = logging.getLogger(__name__) -def extract_os() -> Iterator[Tuple[Feature, Address]]: +def extract_os() -> Iterator[tuple[Feature, Address]]: format_name: str = ida_loader.get_file_type_name() if "PE" in format_name: @@ -46,7 +46,7 @@ def extract_os() -> Iterator[Tuple[Feature, Address]]: return -def extract_arch() -> Iterator[Tuple[Feature, Address]]: +def extract_arch() -> Iterator[tuple[Feature, Address]]: procname = capa.ida.helpers.get_processor_name() if procname == "metapc" and capa.ida.helpers.is_64bit(): yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/ida/helpers.py b/capa/features/extractors/ida/helpers.py index fc22bc38d..dbd2166a8 100644 --- a/capa/features/extractors/ida/helpers.py +++ b/capa/features/extractors/ida/helpers.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import functools -from typing import Any, Dict, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional import idc import idaapi @@ -124,9 +124,9 @@ def inspect_import(imports, library, ea, function, ordinal): return True -def get_file_imports() -> Dict[int, Tuple[str, str, int]]: +def get_file_imports() -> dict[int, tuple[str, str, int]]: """get file imports""" - imports: Dict[int, Tuple[str, str, int]] = {} + imports: dict[int, tuple[str, str, int]] = {} for idx in range(idaapi.get_import_module_qty()): library = idaapi.get_import_module_name(idx) @@ -147,7 +147,7 @@ def get_file_imports() -> Dict[int, Tuple[str, str, int]]: return imports -def get_file_externs() -> Dict[int, Tuple[str, str, int]]: +def get_file_externs() -> dict[int, tuple[str, str, int]]: externs = {} for seg in get_segments(skip_header_segments=True): @@ -248,7 +248,7 @@ def find_string_at(ea: int, min_: int = 4) -> str: return "" -def get_op_phrase_info(op: idaapi.op_t) -> Dict: +def get_op_phrase_info(op: idaapi.op_t) -> dict: """parse phrase features from operand Pretty much dup of sark's implementation: @@ -323,7 +323,7 @@ def is_frame_register(reg: int) -> bool: return reg in (idautils.procregs.sp.reg, idautils.procregs.bp.reg) -def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[Tuple[Any]] = None) -> idaapi.op_t: +def get_insn_ops(insn: idaapi.insn_t, target_ops: Optional[tuple[Any]] = None) -> idaapi.op_t: """yield op_t for instruction, filter on type if specified""" for op in insn.ops: if op.type == idaapi.o_void: diff --git a/capa/features/extractors/ida/insn.py b/capa/features/extractors/ida/insn.py index bd70d0faa..caf90c732 100644 --- a/capa/features/extractors/ida/insn.py +++ b/capa/features/extractors/ida/insn.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Any, Dict, Tuple, Iterator, Optional +from typing import Any, Iterator, Optional import idc import ida_ua @@ -25,19 +25,19 @@ SECURITY_COOKIE_BYTES_DELTA = 0x40 -def get_imports(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_imports(ctx: dict[str, Any]) -> dict[int, Any]: if "imports_cache" not in ctx: ctx["imports_cache"] = capa.features.extractors.ida.helpers.get_file_imports() return ctx["imports_cache"] -def get_externs(ctx: Dict[str, Any]) -> Dict[int, Any]: +def get_externs(ctx: dict[str, Any]) -> dict[int, Any]: if "externs_cache" not in ctx: ctx["externs_cache"] = capa.features.extractors.ida.helpers.get_file_externs() return ctx["externs_cache"] -def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[Tuple[str, str]]: +def check_for_api_call(insn: idaapi.insn_t, funcs: dict[int, Any]) -> Optional[tuple[str, str]]: """check instruction for API call""" info = None ref = insn.ea @@ -65,7 +65,7 @@ def check_for_api_call(insn: idaapi.insn_t, funcs: Dict[int, Any]) -> Optional[T return info -def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse instruction API features @@ -135,7 +135,7 @@ def extract_insn_api_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) def extract_insn_number_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction number features example: @@ -181,7 +181,7 @@ def extract_insn_number_features( yield OperandOffset(i, const), ih.address -def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse referenced byte sequences example: @@ -203,7 +203,7 @@ def extract_insn_bytes_features(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_insn_string_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction string features @@ -221,7 +221,7 @@ def extract_insn_string_features( def extract_insn_offset_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction structure offset features @@ -369,7 +369,7 @@ def is_nzxor_stack_cookie(f: idaapi.func_t, bb: idaapi.BasicBlock, insn: idaapi. def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse instruction non-zeroing XOR instruction ignore expected non-zeroing XORs, e.g. security cookies @@ -387,14 +387,14 @@ def extract_insn_nzxor_characteristic_features( def extract_insn_mnemonic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction mnemonic features""" yield Mnemonic(idc.print_insn_mnem(ih.inner.ea)), ih.address def extract_insn_obfs_call_plus_5_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -409,7 +409,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features( def extract_insn_peb_access_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction peb access fs:[0x30] on x86, gs:[0x60] on x64 @@ -437,7 +437,7 @@ def extract_insn_peb_access_characteristic_features( def extract_insn_segment_access_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse instruction fs or gs access TODO: @@ -466,7 +466,7 @@ def extract_insn_segment_access_features( def extract_insn_cross_section_cflow( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """inspect the instruction for a CALL or JMP that crosses section boundaries""" insn: idaapi.insn_t = ih.inner @@ -482,7 +482,7 @@ def extract_insn_cross_section_cflow( yield Characteristic("cross section flow"), ih.address -def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract functions calls from features most relevant at the function scope, however, its most efficient to extract at the instruction scope @@ -496,7 +496,7 @@ def extract_function_calls_from(fh: FunctionHandle, bbh: BBHandle, ih: InsnHandl def extract_function_indirect_call_characteristic_features( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """extract indirect function calls (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -509,7 +509,7 @@ def extract_function_indirect_call_characteristic_features( yield Characteristic("indirect call"), ih.address -def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bbh: BBHandle, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: """extract instruction features""" for inst_handler in INSTRUCTION_HANDLERS: for feature, ea in inst_handler(f, bbh, insn): diff --git a/capa/features/extractors/null.py b/capa/features/extractors/null.py index a0dd9104d..ad7be0adb 100644 --- a/capa/features/extractors/null.py +++ b/capa/features/extractors/null.py @@ -5,11 +5,9 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Tuple, Union +from typing import Union, TypeAlias from dataclasses import dataclass -from typing_extensions import TypeAlias - from capa.features.common import Feature from capa.features.address import NO_ADDRESS, Address, ThreadAddress, ProcessAddress, DynamicCallAddress from capa.features.extractors.base_extractor import ( @@ -27,19 +25,19 @@ @dataclass class InstructionFeatures: - features: List[Tuple[Address, Feature]] + features: list[tuple[Address, Feature]] @dataclass class BasicBlockFeatures: - features: List[Tuple[Address, Feature]] - instructions: Dict[Address, InstructionFeatures] + features: list[tuple[Address, Feature]] + instructions: dict[Address, InstructionFeatures] @dataclass class FunctionFeatures: - features: List[Tuple[Address, Feature]] - basic_blocks: Dict[Address, BasicBlockFeatures] + features: list[tuple[Address, Feature]] + basic_blocks: dict[Address, BasicBlockFeatures] @dataclass @@ -52,9 +50,9 @@ class NullStaticFeatureExtractor(StaticFeatureExtractor): base_address: Address sample_hashes: SampleHashes - global_features: List[Feature] - file_features: List[Tuple[Address, Feature]] - functions: Dict[Address, FunctionFeatures] + global_features: list[Feature] + file_features: list[tuple[Address, Feature]] + functions: dict[Address, FunctionFeatures] def get_base_address(self): return self.base_address @@ -98,19 +96,19 @@ def extract_insn_features(self, f, bb, insn): @dataclass class CallFeatures: name: str - features: List[Tuple[Address, Feature]] + features: list[tuple[Address, Feature]] @dataclass class ThreadFeatures: - features: List[Tuple[Address, Feature]] - calls: Dict[Address, CallFeatures] + features: list[tuple[Address, Feature]] + calls: dict[Address, CallFeatures] @dataclass class ProcessFeatures: - features: List[Tuple[Address, Feature]] - threads: Dict[Address, ThreadFeatures] + features: list[tuple[Address, Feature]] + threads: dict[Address, ThreadFeatures] name: str @@ -118,9 +116,9 @@ class ProcessFeatures: class NullDynamicFeatureExtractor(DynamicFeatureExtractor): base_address: Address sample_hashes: SampleHashes - global_features: List[Feature] - file_features: List[Tuple[Address, Feature]] - processes: Dict[Address, ProcessFeatures] + global_features: list[Feature] + file_features: list[tuple[Address, Feature]] + processes: dict[Address, ProcessFeatures] def extract_global_features(self): for feature in self.global_features: diff --git a/capa/features/extractors/pefile.py b/capa/features/extractors/pefile.py index 1dd478adf..cac7ecc42 100644 --- a/capa/features/extractors/pefile.py +++ b/capa/features/extractors/pefile.py @@ -148,11 +148,11 @@ def extract_file_features(pe, buf): buf: the raw sample bytes yields: - Tuple[Feature, VA]: a feature and its location. + tuple[Feature, VA]: a feature and its location. """ for file_handler in FILE_HANDLERS: - # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + # file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]] for feature, va in file_handler(pe=pe, buf=buf): # type: ignore yield feature, va @@ -177,10 +177,10 @@ def extract_global_features(pe, buf): buf: the raw sample bytes yields: - Tuple[Feature, VA]: a feature and its location. + tuple[Feature, VA]: a feature and its location. """ for handler in GLOBAL_HANDLERS: - # file_handler: type: (pe, bytes) -> Iterable[Tuple[Feature, Address]] + # file_handler: type: (pe, bytes) -> Iterable[tuple[Feature, Address]] for feature, va in handler(pe=pe, buf=buf): # type: ignore yield feature, va diff --git a/capa/features/extractors/viv/basicblock.py b/capa/features/extractors/viv/basicblock.py index 2e450fb69..3515c29cf 100644 --- a/capa/features/extractors/viv/basicblock.py +++ b/capa/features/extractors/viv/basicblock.py @@ -8,7 +8,7 @@ import string import struct -from typing import Tuple, Iterator +from typing import Iterator import envi import envi.archs.i386.disasm @@ -20,7 +20,7 @@ from capa.features.extractors.base_extractor import BBHandle, FunctionHandle -def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def interface_extract_basic_block_XXX(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ parse features from the given basic block. @@ -47,7 +47,7 @@ def _bb_has_tight_loop(f, bb): return False -def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_bb_tight_loop(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for tight loop indicators""" if _bb_has_tight_loop(f, bb.inner): yield Characteristic("tight loop"), bb.address @@ -70,7 +70,7 @@ def _bb_has_stackstring(f, bb): return False -def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_stackstring(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """check basic block for stackstring indicators""" if _bb_has_stackstring(f, bb.inner): yield Characteristic("stack string"), bb.address @@ -145,7 +145,7 @@ def is_printable_utf16le(chars: bytes) -> bool: return False -def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given basic block. @@ -154,7 +154,7 @@ def extract_features(f: FunctionHandle, bb: BBHandle) -> Iterator[Tuple[Feature, bb (viv_utils.BasicBlock): the basic block to process. yields: - Tuple[Feature, int]: the features and their location found in this basic block. + tuple[Feature, int]: the features and their location found in this basic block. """ yield BasicBlock(), AbsoluteVirtualAddress(bb.inner.va) for bb_handler in BASIC_BLOCK_HANDLERS: diff --git a/capa/features/extractors/viv/extractor.py b/capa/features/extractors/viv/extractor.py index 001e9e35a..ad64858ff 100644 --- a/capa/features/extractors/viv/extractor.py +++ b/capa/features/extractors/viv/extractor.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Any, Dict, List, Tuple, Iterator +from typing import Any, Iterator from pathlib import Path import viv_utils @@ -39,7 +39,7 @@ def __init__(self, vw, path: Path, os): super().__init__(hashes=SampleHashes.from_bytes(self.buf)) # pre-compute these because we'll yield them at *every* scope. - self.global_features: List[Tuple[Feature, Address]] = [] + self.global_features: list[tuple[Feature, Address]] = [] self.global_features.extend(capa.features.extractors.viv.file.extract_file_format(self.buf)) self.global_features.extend(capa.features.extractors.common.extract_os(self.buf, os)) self.global_features.extend(capa.features.extractors.viv.global_.extract_arch(self.vw)) @@ -55,13 +55,13 @@ def extract_file_features(self): yield from capa.features.extractors.viv.file.extract_features(self.vw, self.buf) def get_functions(self) -> Iterator[FunctionHandle]: - cache: Dict[str, Any] = {} + cache: dict[str, Any] = {} for va in sorted(self.vw.getFunctions()): yield FunctionHandle( address=AbsoluteVirtualAddress(va), inner=viv_utils.Function(self.vw, va), ctx={"cache": cache} ) - def extract_function_features(self, fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_function_features(self, fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.function.extract_features(fh) def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: @@ -69,7 +69,7 @@ def get_basic_blocks(self, fh: FunctionHandle) -> Iterator[BBHandle]: for bb in f.basic_blocks: yield BBHandle(address=AbsoluteVirtualAddress(bb.va), inner=bb) - def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[Tuple[Feature, Address]]: + def extract_basic_block_features(self, fh: FunctionHandle, bbh) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.basicblock.extract_features(fh, bbh) def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHandle]: @@ -79,7 +79,7 @@ def get_instructions(self, fh: FunctionHandle, bbh: BBHandle) -> Iterator[InsnHa def extract_insn_features( self, fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.viv.insn.extract_features(fh, bbh, ih) def is_library_function(self, addr): diff --git a/capa/features/extractors/viv/file.py b/capa/features/extractors/viv/file.py index 2fc09841b..41ce836b6 100644 --- a/capa/features/extractors/viv/file.py +++ b/capa/features/extractors/viv/file.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import PE.carve as pe_carve # vivisect PE import vivisect @@ -21,7 +21,7 @@ from capa.features.address import Address, FileOffsetAddress, AbsoluteVirtualAddress -def extract_file_embedded_pe(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_embedded_pe(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: for offset, _ in pe_carve.carve(buf, 1): yield Characteristic("embedded pe"), FileOffsetAddress(offset) @@ -37,7 +37,7 @@ def get_first_vw_filename(vw: vivisect.VivWorkspace): return next(iter(vw.filemeta.keys())) -def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[tuple[Feature, Address]]: for va, _, name, _ in vw.getExports(): yield Export(name), AbsoluteVirtualAddress(va) @@ -56,7 +56,7 @@ def extract_file_export_names(vw: vivisect.VivWorkspace, **kwargs) -> Iterator[T yield Characteristic("forwarded export"), AbsoluteVirtualAddress(va) -def extract_file_import_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_import_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: """ extract imported function names 1. imports by ordinal: @@ -91,16 +91,16 @@ def is_viv_ord_impname(impname: str) -> bool: return True -def extract_file_section_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_section_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: for va, _, segname, _ in vw.getSegments(): yield Section(segname), AbsoluteVirtualAddress(va) -def extract_file_strings(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(buf) -def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_function_names(vw, **kwargs) -> Iterator[tuple[Feature, Address]]: """ extract the names of statically-linked library functions. """ @@ -117,11 +117,11 @@ def extract_file_function_names(vw, **kwargs) -> Iterator[Tuple[Feature, Address yield FunctionName(name[1:]), addr -def extract_file_format(buf, **kwargs) -> Iterator[Tuple[Feature, Address]]: +def extract_file_format(buf, **kwargs) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_format(buf) -def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]: +def extract_features(vw, buf: bytes) -> Iterator[tuple[Feature, Address]]: """ extract file features from given workspace @@ -130,7 +130,7 @@ def extract_features(vw, buf: bytes) -> Iterator[Tuple[Feature, Address]]: buf: the raw input file bytes yields: - Tuple[Feature, Address]: a feature and its location. + tuple[Feature, Address]: a feature and its location. """ for file_handler in FILE_HANDLERS: diff --git a/capa/features/extractors/viv/function.py b/capa/features/extractors/viv/function.py index ab1dcb429..9cc1e2168 100644 --- a/capa/features/extractors/viv/function.py +++ b/capa/features/extractors/viv/function.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Tuple, Iterator +from typing import Iterator import envi import viv_utils @@ -19,7 +19,7 @@ from capa.features.extractors.base_extractor import FunctionHandle -def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ parse features from the given function. @@ -32,7 +32,7 @@ def interface_extract_function_XXX(fh: FunctionHandle) -> Iterator[Tuple[Feature raise NotImplementedError -def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: if fh.inner.vw.metadata["Format"] == "elf": # the file's symbol table gets added to the metadata of the vivisect workspace. # this is in order to eliminate the computational overhead of refetching symtab each time. @@ -54,13 +54,13 @@ def extract_function_symtab_names(fh: FunctionHandle) -> Iterator[Tuple[Feature, yield FunctionName(sym_name), fh.address -def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_to(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]: f: viv_utils.Function = fhandle.inner for src, _, _, _ in f.vw.getXrefsTo(f.va, rtype=vivisect.const.REF_CODE): yield Characteristic("calls to"), AbsoluteVirtualAddress(src) -def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_loop(fhandle: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ parse if a function has a loop """ @@ -88,7 +88,7 @@ def extract_function_loop(fhandle: FunctionHandle) -> Iterator[Tuple[Feature, Ad yield Characteristic("loop"), fhandle.address -def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(fh: FunctionHandle) -> Iterator[tuple[Feature, Address]]: """ extract features from the given function. @@ -96,7 +96,7 @@ def extract_features(fh: FunctionHandle) -> Iterator[Tuple[Feature, Address]]: fh: the function handle from which to extract features yields: - Tuple[Feature, int]: the features and their location found in this function. + tuple[Feature, int]: the features and their location found in this function. """ for func_handler in FUNCTION_HANDLERS: for feature, addr in func_handler(fh): diff --git a/capa/features/extractors/viv/global_.py b/capa/features/extractors/viv/global_.py index 39ee79bb2..4f1970a89 100644 --- a/capa/features/extractors/viv/global_.py +++ b/capa/features/extractors/viv/global_.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ARCH_I386, ARCH_AMD64, Arch, Feature from capa.features.address import NO_ADDRESS, Address @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -def extract_arch(vw) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(vw) -> Iterator[tuple[Feature, Address]]: arch = vw.getMeta("Architecture") if arch == "amd64": yield Arch(ARCH_AMD64), NO_ADDRESS diff --git a/capa/features/extractors/viv/indirect_calls.py b/capa/features/extractors/viv/indirect_calls.py index d39f08c91..6646d8716 100644 --- a/capa/features/extractors/viv/indirect_calls.py +++ b/capa/features/extractors/viv/indirect_calls.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import collections -from typing import Set, List, Deque, Tuple, Optional +from typing import Deque, Optional import envi import vivisect.const @@ -28,7 +28,7 @@ DESTRUCTIVE_MNEMONICS = ("mov", "lea", "pop", "xor") -def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: +def get_previous_instructions(vw: VivWorkspace, va: int) -> list[int]: """ collect the instructions that flow to the given address, local to the current function. @@ -37,7 +37,7 @@ def get_previous_instructions(vw: VivWorkspace, va: int) -> List[int]: va (int): the virtual address to inspect returns: - List[int]: the prior instructions, which may fallthrough and/or jump here + list[int]: the prior instructions, which may fallthrough and/or jump here """ ret = [] @@ -71,7 +71,7 @@ class NotFoundError(Exception): pass -def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[int]]: +def find_definition(vw: VivWorkspace, va: int, reg: int) -> tuple[int, Optional[int]]: """ scan backwards from the given address looking for assignments to the given register. if a constant, return that value. @@ -88,7 +88,7 @@ def find_definition(vw: VivWorkspace, va: int, reg: int) -> Tuple[int, Optional[ NotFoundError: when the definition cannot be found. """ q: Deque[int] = collections.deque() - seen: Set[int] = set() + seen: set[int] = set() q.extend(get_previous_instructions(vw, va)) while q: @@ -139,7 +139,7 @@ def is_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> bool: return insn.mnem in ("call", "jmp") and isinstance(insn.opers[0], envi.archs.i386.disasm.i386RegOper) -def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> Tuple[int, Optional[int]]: +def resolve_indirect_call(vw: VivWorkspace, va: int, insn: envi.Opcode) -> tuple[int, Optional[int]]: """ inspect the given indirect call instruction and attempt to resolve the target address. diff --git a/capa/features/extractors/viv/insn.py b/capa/features/extractors/viv/insn.py index 329bc94d0..2964db834 100644 --- a/capa/features/extractors/viv/insn.py +++ b/capa/features/extractors/viv/insn.py @@ -5,7 +5,7 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Callable, Iterator +from typing import Callable, Iterator import envi import envi.exc @@ -33,7 +33,7 @@ def interface_extract_instruction_XXX( fh: FunctionHandle, bbh: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse features from the given instruction. @@ -53,7 +53,7 @@ def get_imports(vw): caching accessor to vivisect workspace imports avoids performance issues in vivisect when collecting locations - returns: Dict[int, Tuple[str, str]] + returns: dict[int, tuple[str, str]] """ if "imports" in vw.metadata: return vw.metadata["imports"] @@ -65,7 +65,7 @@ def get_imports(vw): return imports -def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_api_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse API features from the given instruction. @@ -260,7 +260,7 @@ def read_bytes(vw, va: int) -> bytes: raise -def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_bytes_features(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse byte sequence features from the given instruction. example: @@ -371,7 +371,7 @@ def is_security_cookie(f, bb, insn) -> bool: def extract_insn_nzxor_characteristic_features( fh: FunctionHandle, bbhandle: BBHandle, ih: InsnHandle -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """ parse non-zeroing XOR instruction from the given instruction. ignore expected non-zeroing XORs, e.g. security cookies. @@ -392,12 +392,12 @@ def extract_insn_nzxor_characteristic_features( yield Characteristic("nzxor"), ih.address -def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_mnemonic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse mnemonic features from the given instruction.""" yield Mnemonic(ih.inner.mnem), ih.address -def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse call $+5 instruction from the given instruction. """ @@ -415,7 +415,7 @@ def extract_insn_obfs_call_plus_5_characteristic_features(f, bb, ih: InsnHandle) yield Characteristic("call $+5"), ih.address -def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ parse peb access from the given function. fs:[0x30] on x86, gs:[0x60] on x64 """ @@ -451,7 +451,7 @@ def extract_insn_peb_access_characteristic_features(f, bb, ih: InsnHandle) -> It pass -def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_segment_access_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """parse the instruction for access to fs or gs""" insn: envi.Opcode = ih.inner @@ -472,7 +472,7 @@ def get_section(vw, va: int): raise KeyError(va) -def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ inspect the instruction for a CALL or JMP that crosses section boundaries. """ @@ -513,7 +513,7 @@ def extract_insn_cross_section_cflow(fh: FunctionHandle, bb, ih: InsnHandle) -> # this is a feature that's most relevant at the function scope, # however, its most efficient to extract at the instruction scope. -def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: insn: envi.Opcode = ih.inner f: viv_utils.Function = fh.inner @@ -554,7 +554,7 @@ def extract_function_calls_from(fh: FunctionHandle, bb, ih: InsnHandle) -> Itera # this is a feature that's most relevant at the function or basic block scope, # however, its most efficient to extract at the instruction scope. -def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle) -> Iterator[tuple[Feature, Address]]: """ extract indirect function call characteristic (e.g., call eax or call dword ptr [edx+4]) does not include calls like => call ds:dword_ABD4974 @@ -578,7 +578,7 @@ def extract_function_indirect_call_characteristic_features(f, bb, ih: InsnHandle def extract_op_number_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse number features from the given operand. example: @@ -623,7 +623,7 @@ def extract_op_number_features( def extract_op_offset_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse structure offset features from the given operand.""" # example: # @@ -674,7 +674,7 @@ def extract_op_offset_features( def extract_op_string_features( fh: FunctionHandle, bb, ih: InsnHandle, i, oper: envi.Operand -) -> Iterator[Tuple[Feature, Address]]: +) -> Iterator[tuple[Feature, Address]]: """parse string features from the given operand.""" # example: # @@ -705,15 +705,15 @@ def extract_op_string_features( yield String(s), ih.address -def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterator[tuple[Feature, Address]]: for i, oper in enumerate(insn.inner.opers): for op_handler in OPERAND_HANDLERS: for feature, addr in op_handler(f, bb, insn, i, oper): yield feature, addr -OPERAND_HANDLERS: List[ - Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[Tuple[Feature, Address]]] +OPERAND_HANDLERS: list[ + Callable[[FunctionHandle, BBHandle, InsnHandle, int, envi.Operand], Iterator[tuple[Feature, Address]]] ] = [ extract_op_number_features, extract_op_offset_features, @@ -721,7 +721,7 @@ def extract_operand_features(f: FunctionHandle, bb, insn: InsnHandle) -> Iterato ] -def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]: +def extract_features(f, bb, insn) -> Iterator[tuple[Feature, Address]]: """ extract features from the given insn. @@ -731,14 +731,14 @@ def extract_features(f, bb, insn) -> Iterator[Tuple[Feature, Address]]: insn (vivisect...Instruction): the instruction to process. yields: - Tuple[Feature, Address]: the features and their location found in this insn. + tuple[Feature, Address]: the features and their location found in this insn. """ for insn_handler in INSTRUCTION_HANDLERS: for feature, addr in insn_handler(f, bb, insn): yield feature, addr -INSTRUCTION_HANDLERS: List[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[Tuple[Feature, Address]]]] = [ +INSTRUCTION_HANDLERS: list[Callable[[FunctionHandle, BBHandle, InsnHandle], Iterator[tuple[Feature, Address]]]] = [ extract_insn_api_features, extract_insn_bytes_features, extract_insn_nzxor_characteristic_features, diff --git a/capa/features/extractors/vmray/__init__.py b/capa/features/extractors/vmray/__init__.py index 4a004af61..a8976cd8c 100644 --- a/capa/features/extractors/vmray/__init__.py +++ b/capa/features/extractors/vmray/__init__.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Dict, List, Tuple, Optional +from typing import Optional from pathlib import Path from zipfile import ZipFile from collections import defaultdict @@ -58,17 +58,17 @@ def __init__(self, zipfile_path: Path): "VMRay feature extractor does not support flog version %s" % self.flog.analysis.log_version ) - self.exports: Dict[int, str] = {} - self.imports: Dict[int, Tuple[str, str]] = {} - self.sections: Dict[int, str] = {} - self.monitor_processes: Dict[int, VMRayMonitorProcess] = {} - self.monitor_threads: Dict[int, VMRayMonitorThread] = {} + self.exports: dict[int, str] = {} + self.imports: dict[int, tuple[str, str]] = {} + self.sections: dict[int, str] = {} + self.monitor_processes: dict[int, VMRayMonitorProcess] = {} + self.monitor_threads: dict[int, VMRayMonitorThread] = {} # map monitor thread IDs to their associated monitor process ID - self.monitor_threads_by_monitor_process: Dict[int, List[int]] = defaultdict(list) + self.monitor_threads_by_monitor_process: dict[int, list[int]] = defaultdict(list) # map function calls to their associated monitor thread ID mapped to its associated monitor process ID - self.monitor_process_calls: Dict[int, Dict[int, List[FunctionCall]]] = defaultdict(lambda: defaultdict(list)) + self.monitor_process_calls: dict[int, dict[int, list[FunctionCall]]] = defaultdict(lambda: defaultdict(list)) self.base_address: int diff --git a/capa/features/extractors/vmray/call.py b/capa/features/extractors/vmray/call.py index febb1b338..6ded3a4fc 100644 --- a/capa/features/extractors/vmray/call.py +++ b/capa/features/extractors/vmray/call.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.helpers from capa.features.insn import API, Number @@ -18,7 +18,7 @@ logger = logging.getLogger(__name__) -def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: if param.deref is not None: # pointer types contain a special "deref" member that stores the deref'd value # so we check for this first and ignore Param.value as this always contains the @@ -39,7 +39,7 @@ def get_call_param_features(param: Param, ch: CallHandle) -> Iterator[Tuple[Feat yield Number(hexint(param.value)), ch.address -def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: call: FunctionCall = ch.inner if call.params_in: @@ -50,7 +50,7 @@ def extract_call_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) - yield API(name), ch.address -def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[Tuple[Feature, Address]]: +def extract_features(ph: ProcessHandle, th: ThreadHandle, ch: CallHandle) -> Iterator[tuple[Feature, Address]]: for handler in CALL_HANDLERS: for feature, addr in handler(ph, th, ch): yield feature, addr diff --git a/capa/features/extractors/vmray/extractor.py b/capa/features/extractors/vmray/extractor.py index 36a0b430f..a9f0491c9 100644 --- a/capa/features/extractors/vmray/extractor.py +++ b/capa/features/extractors/vmray/extractor.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. -from typing import List, Tuple, Iterator +from typing import Iterator from pathlib import Path import capa.helpers @@ -34,8 +34,8 @@ ) -def get_formatted_params(params: ParamList) -> List[str]: - params_list: List[str] = [] +def get_formatted_params(params: ParamList) -> list[str]: + params_list: list[str] = [] for param in params: if param.deref and param.deref.value is not None: @@ -69,10 +69,10 @@ def get_base_address(self) -> Address: # value according to the PE header, the actual trace may use a different imagebase return AbsoluteVirtualAddress(self.analysis.base_address) - def extract_file_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_file_features(self) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.vmray.file.extract_features(self.analysis) - def extract_global_features(self) -> Iterator[Tuple[Feature, Address]]: + def extract_global_features(self) -> Iterator[tuple[Feature, Address]]: yield from self.global_features def get_processes(self) -> Iterator[ProcessHandle]: @@ -80,7 +80,7 @@ def get_processes(self) -> Iterator[ProcessHandle]: address: ProcessAddress = ProcessAddress(pid=monitor_process.pid, ppid=monitor_process.ppid) yield ProcessHandle(address, inner=monitor_process) - def extract_process_features(self, ph: ProcessHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_process_features(self, ph: ProcessHandle) -> Iterator[tuple[Feature, Address]]: # we have not identified process-specific features for VMRay yet yield from [] @@ -95,7 +95,7 @@ def get_threads(self, ph: ProcessHandle) -> Iterator[ThreadHandle]: address: ThreadAddress = ThreadAddress(process=ph.address, tid=monitor_thread.tid) yield ThreadHandle(address=address, inner=monitor_thread) - def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[Tuple[Feature, Address]]: + def extract_thread_features(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[tuple[Feature, Address]]: if False: # force this routine to be a generator, # but we don't actually have any elements to generate. @@ -109,7 +109,7 @@ def get_calls(self, ph: ProcessHandle, th: ThreadHandle) -> Iterator[CallHandle] def extract_call_features( self, ph: ProcessHandle, th: ThreadHandle, ch: CallHandle - ) -> Iterator[Tuple[Feature, Address]]: + ) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.vmray.call.extract_features(ph, th, ch) def get_call_name(self, ph, th, ch) -> str: diff --git a/capa/features/extractors/vmray/file.py b/capa/features/extractors/vmray/file.py index 7f4ba0395..b0e1772f2 100644 --- a/capa/features/extractors/vmray/file.py +++ b/capa/features/extractors/vmray/file.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator import capa.features.extractors.common from capa.features.file import Export, Import, Section @@ -18,52 +18,52 @@ logger = logging.getLogger(__name__) -def extract_export_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_export_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, name in analysis.exports.items(): yield Export(name), AbsoluteVirtualAddress(addr) -def extract_import_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_import_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, (module, api) in analysis.imports.items(): for symbol in generate_symbols(module, api, include_dll=True): yield Import(symbol), AbsoluteVirtualAddress(addr) -def extract_section_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_section_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for addr, name in analysis.sections.items(): yield Section(name), AbsoluteVirtualAddress(addr) -def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_filenames(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for filename in analysis.sv2.filenames.values(): yield String(filename.filename), NO_ADDRESS -def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_mutex_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for mutex in analysis.sv2.mutexes.values(): yield String(mutex.name), NO_ADDRESS -def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_domain_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for domain in analysis.sv2.domains.values(): yield String(domain.domain), NO_ADDRESS -def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_ip_addresses(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for ip_address in analysis.sv2.ip_addresses.values(): yield String(ip_address.ip_address), NO_ADDRESS -def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_referenced_registry_key_names(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for registry_record in analysis.sv2.registry_records.values(): yield String(registry_record.reg_key_name), NO_ADDRESS -def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_file_strings(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: yield from capa.features.extractors.common.extract_file_strings(analysis.sample_file_buf) -def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for handler in FILE_HANDLERS: for feature, addr in handler(analysis): yield feature, addr diff --git a/capa/features/extractors/vmray/global_.py b/capa/features/extractors/vmray/global_.py index a42ce511e..c923a87fc 100644 --- a/capa/features/extractors/vmray/global_.py +++ b/capa/features/extractors/vmray/global_.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging -from typing import Tuple, Iterator +from typing import Iterator from capa.features.common import ( OS, @@ -27,7 +27,7 @@ logger = logging.getLogger(__name__) -def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_arch(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: file_type: str = analysis.file_type if "x86-32" in file_type: @@ -38,7 +38,7 @@ def extract_arch(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: raise ValueError("unrecognized arch from the VMRay report: %s" % file_type) -def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_format(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: assert analysis.sample_file_static_data is not None if analysis.sample_file_static_data.pe: yield Format(FORMAT_PE), NO_ADDRESS @@ -48,7 +48,7 @@ def extract_format(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]] raise ValueError("unrecognized file format from the VMRay report: %s" % analysis.file_type) -def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_os(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: file_type: str = analysis.file_type if "windows" in file_type.lower(): @@ -59,7 +59,7 @@ def extract_os(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: raise ValueError("unrecognized OS from the VMRay report: %s" % file_type) -def extract_features(analysis: VMRayAnalysis) -> Iterator[Tuple[Feature, Address]]: +def extract_features(analysis: VMRayAnalysis) -> Iterator[tuple[Feature, Address]]: for global_handler in GLOBAL_HANDLER: for feature, addr in global_handler(analysis): yield feature, addr diff --git a/capa/features/extractors/vmray/models.py b/capa/features/extractors/vmray/models.py index f5371bec1..c2d6551aa 100644 --- a/capa/features/extractors/vmray/models.py +++ b/capa/features/extractors/vmray/models.py @@ -6,11 +6,10 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Dict, List, Union, Optional +from typing import Union, Optional, Annotated import xmltodict from pydantic import Field, BaseModel -from typing_extensions import Annotated from pydantic.functional_validators import BeforeValidator """ @@ -87,7 +86,7 @@ class Param(BaseModel): deref: Optional[ParamDeref] = None -def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]: +def validate_ensure_is_list(value: Union[list[Param], Param]) -> list[Param]: if isinstance(value, list): return value else: @@ -95,9 +94,9 @@ def validate_ensure_is_list(value: Union[List[Param], Param]) -> List[Param]: # params may be stored as a list of Param or a single Param so we convert -# the input value to Python list type before the inner validation (List[Param]) +# the input value to Python list type before the inner validation (list[Param]) # is called -ParamList = Annotated[List[Param], BeforeValidator(validate_ensure_is_list)] +ParamList = Annotated[list[Param], BeforeValidator(validate_ensure_is_list)] class Params(BaseModel): @@ -164,9 +163,9 @@ class MonitorThread(BaseModel): # handle if there's only single entries, but the model expects a list -MonitorProcessList = Annotated[List[MonitorProcess], BeforeValidator(validate_ensure_is_list)] -MonitorThreadList = Annotated[List[MonitorThread], BeforeValidator(validate_ensure_is_list)] -FunctionCallList = Annotated[List[FunctionCall], BeforeValidator(validate_ensure_is_list)] +MonitorProcessList = Annotated[list[MonitorProcess], BeforeValidator(validate_ensure_is_list)] +MonitorThreadList = Annotated[list[MonitorThread], BeforeValidator(validate_ensure_is_list)] +FunctionCallList = Annotated[list[FunctionCall], BeforeValidator(validate_ensure_is_list)] class Analysis(BaseModel): @@ -177,7 +176,7 @@ class Analysis(BaseModel): monitor_processes: MonitorProcessList = Field(alias="monitor_process", default=[]) monitor_threads: MonitorThreadList = Field(alias="monitor_thread", default=[]) function_calls: FunctionCallList = Field(alias="fncall", default=[]) - # function_returns: List[FunctionReturn] = Field(alias="fnret", default=[]) + # function_returns: list[FunctionReturn] = Field(alias="fnret", default=[]) class Flog(BaseModel): @@ -186,7 +185,7 @@ class Flog(BaseModel): # models for summary_v2.json file, certain fields left as comments for documentation purposes class GenericReference(BaseModel): - path: List[str] + path: list[str] source: str @@ -226,12 +225,12 @@ class PEFileImport(BaseModel): class PEFileImportModule(BaseModel): dll: str - apis: List[PEFileImport] + apis: list[PEFileImport] class PEFileSection(BaseModel): # entropy: float - # flags: List[str] = [] + # flags: list[str] = [] name: str # raw_data_offset: int # raw_data_size: int @@ -241,9 +240,9 @@ class PEFileSection(BaseModel): class PEFile(BaseModel): basic_info: PEFileBasicInfo - exports: List[PEFileExport] = [] - imports: List[PEFileImportModule] = [] - sections: List[PEFileSection] = [] + exports: list[PEFileExport] = [] + imports: list[PEFileImportModule] = [] + sections: list[PEFileSection] = [] class ElfFileSectionHeader(BaseModel): @@ -268,7 +267,7 @@ class ElfFileHeader(BaseModel): class ElfFile(BaseModel): # file_header: ElfFileHeader - sections: List[ElfFileSection] + sections: list[ElfFileSection] class StaticData(BaseModel): @@ -284,7 +283,7 @@ class FileHashes(BaseModel): class File(BaseModel): - # categories: List[str] + # categories: list[str] hash_values: FileHashes # is_artifact: bool # is_ioc: bool @@ -292,11 +291,11 @@ class File(BaseModel): # size: int # is_truncated: bool # mime_type: Optional[str] = None - # operations: List[str] = [] - # ref_filenames: List[GenericReference] = [] - # ref_gfncalls: List[GenericReference] = [] + # operations: list[str] = [] + # ref_filenames: list[GenericReference] = [] + # ref_gfncalls: list[GenericReference] = [] ref_static_data: Optional[StaticDataReference] = None - # ref_vti_matches: List[GenericReference] = [] + # ref_vti_matches: list[GenericReference] = [] # verdict: str @@ -356,13 +355,13 @@ class AnalysisMetadata(BaseModel): class SummaryV2(BaseModel): analysis_metadata: AnalysisMetadata - static_data: Dict[str, StaticData] = {} + static_data: dict[str, StaticData] = {} # recorded artifacts - files: Dict[str, File] = {} - processes: Dict[str, Process] = {} - filenames: Dict[str, Filename] = {} - mutexes: Dict[str, Mutex] = {} - domains: Dict[str, Domain] = {} - ip_addresses: Dict[str, IPAddress] = {} - registry_records: Dict[str, Registry] = {} + files: dict[str, File] = {} + processes: dict[str, Process] = {} + filenames: dict[str, Filename] = {} + mutexes: dict[str, Mutex] = {} + domains: dict[str, Domain] = {} + ip_addresses: dict[str, IPAddress] = {} + registry_records: dict[str, Registry] = {} diff --git a/capa/features/freeze/__init__.py b/capa/features/freeze/__init__.py index ec0d6f609..bb6b3ded0 100644 --- a/capa/features/freeze/__init__.py +++ b/capa/features/freeze/__init__.py @@ -14,14 +14,10 @@ import zlib import logging from enum import Enum -from typing import List, Tuple, Union, Literal +from typing import Union, Literal, TypeAlias from pydantic import Field, BaseModel, ConfigDict -# TODO(williballenthin): use typing.TypeAlias directly in Python 3.10+ -# https://github.com/mandiant/capa/issues/1699 -from typing_extensions import TypeAlias - import capa.helpers import capa.version import capa.features.file @@ -62,7 +58,7 @@ class AddressType(str, Enum): class Address(HashableModel): type: AddressType - value: Union[int, Tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS + value: Union[int, tuple[int, ...], None] = None # None default value to support deserialization of NO_ADDRESS @classmethod def from_capa(cls, a: capa.features.address.Address) -> "Address": @@ -272,52 +268,52 @@ class InstructionFeature(HashableModel): class InstructionFeatures(BaseModel): address: Address - features: Tuple[InstructionFeature, ...] + features: tuple[InstructionFeature, ...] class BasicBlockFeatures(BaseModel): address: Address - features: Tuple[BasicBlockFeature, ...] - instructions: Tuple[InstructionFeatures, ...] + features: tuple[BasicBlockFeature, ...] + instructions: tuple[InstructionFeatures, ...] class FunctionFeatures(BaseModel): address: Address - features: Tuple[FunctionFeature, ...] - basic_blocks: Tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") + features: tuple[FunctionFeature, ...] + basic_blocks: tuple[BasicBlockFeatures, ...] = Field(alias="basic blocks") model_config = ConfigDict(populate_by_name=True) class CallFeatures(BaseModel): address: Address name: str - features: Tuple[CallFeature, ...] + features: tuple[CallFeature, ...] class ThreadFeatures(BaseModel): address: Address - features: Tuple[ThreadFeature, ...] - calls: Tuple[CallFeatures, ...] + features: tuple[ThreadFeature, ...] + calls: tuple[CallFeatures, ...] class ProcessFeatures(BaseModel): address: Address name: str - features: Tuple[ProcessFeature, ...] - threads: Tuple[ThreadFeatures, ...] + features: tuple[ProcessFeature, ...] + threads: tuple[ThreadFeatures, ...] class StaticFeatures(BaseModel): - global_: Tuple[GlobalFeature, ...] = Field(alias="global") - file: Tuple[FileFeature, ...] - functions: Tuple[FunctionFeatures, ...] + global_: tuple[GlobalFeature, ...] = Field(alias="global") + file: tuple[FileFeature, ...] + functions: tuple[FunctionFeatures, ...] model_config = ConfigDict(populate_by_name=True) class DynamicFeatures(BaseModel): - global_: Tuple[GlobalFeature, ...] = Field(alias="global") - file: Tuple[FileFeature, ...] - processes: Tuple[ProcessFeatures, ...] + global_: tuple[GlobalFeature, ...] = Field(alias="global") + file: tuple[FileFeature, ...] + processes: tuple[ProcessFeatures, ...] model_config = ConfigDict(populate_by_name=True) @@ -344,7 +340,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: """ serialize the given extractor to a string """ - global_features: List[GlobalFeature] = [] + global_features: list[GlobalFeature] = [] for feature, _ in extractor.extract_global_features(): global_features.append( GlobalFeature( @@ -352,7 +348,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: ) ) - file_features: List[FileFeature] = [] + file_features: list[FileFeature] = [] for feature, address in extractor.extract_file_features(): file_features.append( FileFeature( @@ -361,7 +357,7 @@ def dumps_static(extractor: StaticFeatureExtractor) -> str: ) ) - function_features: List[FunctionFeatures] = [] + function_features: list[FunctionFeatures] = [] for f in extractor.get_functions(): faddr = Address.from_capa(f.address) ffeatures = [ @@ -446,7 +442,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: """ serialize the given extractor to a string """ - global_features: List[GlobalFeature] = [] + global_features: list[GlobalFeature] = [] for feature, _ in extractor.extract_global_features(): global_features.append( GlobalFeature( @@ -454,7 +450,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: ) ) - file_features: List[FileFeature] = [] + file_features: list[FileFeature] = [] for feature, address in extractor.extract_file_features(): file_features.append( FileFeature( @@ -463,7 +459,7 @@ def dumps_dynamic(extractor: DynamicFeatureExtractor) -> str: ) ) - process_features: List[ProcessFeatures] = [] + process_features: list[ProcessFeatures] = [] for p in extractor.get_processes(): paddr = Address.from_capa(p.address) pname = extractor.get_process_name(p) diff --git a/capa/ghidra/README.md b/capa/ghidra/README.md index 30a5695b7..b6596c577 100644 --- a/capa/ghidra/README.md +++ b/capa/ghidra/README.md @@ -55,7 +55,7 @@ You can also execute [capa_ghidra.py](https://raw.githubusercontent.com/mandiant | capa | `>= 7.0.0` | https://github.com/mandiant/capa/releases | | Ghidrathon | `>= 3.0.0` | https://github.com/mandiant/Ghidrathon/releases | | Ghidra | `>= 10.3.2` | https://github.com/NationalSecurityAgency/ghidra/releases | -| Python | `>= 3.8.0` | https://www.python.org/downloads | +| Python | `>= 3.10.0` | https://www.python.org/downloads | ## Installation diff --git a/capa/ghidra/capa_explorer.py b/capa/ghidra/capa_explorer.py index 4628b6752..0fe5243c8 100644 --- a/capa/ghidra/capa_explorer.py +++ b/capa/ghidra/capa_explorer.py @@ -13,7 +13,7 @@ import json import logging import pathlib -from typing import Any, Dict, List +from typing import Any from ghidra.app.cmd.label import AddLabelCmd, CreateNamespacesCmd from ghidra.program.model.symbol import Namespace, SourceType, SymbolType @@ -68,8 +68,8 @@ def __init__( scope, capability, matches, - attack: List[Dict[Any, Any]], - mbc: List[Dict[Any, Any]], + attack: list[dict[Any, Any]], + mbc: list[dict[Any, Any]], ): self.namespace = namespace self.scope = scope @@ -282,7 +282,7 @@ def parse_json(capa_data): for rule, capability in capa_data.get("rules", {}).items(): # structure to contain rule match address & supporting feature data # {rule match addr:[{feature addr:{node_data}}]} - rule_matches: Dict[Any, List[Any]] = {} + rule_matches: dict[Any, list[Any]] = {} for i in range(len(capability.get("matches"))): # grab rule match location match_loc = capability.get("matches")[i][0].get("value") @@ -368,14 +368,10 @@ def main(): if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 10): from capa.exceptions import UnsupportedRuntimeError - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") exit_code = main() if exit_code != 0: popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821 diff --git a/capa/ghidra/capa_ghidra.py b/capa/ghidra/capa_ghidra.py index 817924930..db43ecfac 100644 --- a/capa/ghidra/capa_ghidra.py +++ b/capa/ghidra/capa_ghidra.py @@ -160,12 +160,8 @@ def main(): if __name__ == "__main__": - if sys.version_info < (3, 8): + if sys.version_info < (3, 10): from capa.exceptions import UnsupportedRuntimeError - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") sys.exit(main()) diff --git a/capa/ghidra/helpers.py b/capa/ghidra/helpers.py index 44af2f017..77c1ec14b 100644 --- a/capa/ghidra/helpers.py +++ b/capa/ghidra/helpers.py @@ -8,7 +8,6 @@ import logging import datetime import contextlib -from typing import List from pathlib import Path import capa @@ -112,7 +111,7 @@ def get_file_sha256(): return currentProgram().getExecutableSHA256() # type: ignore [name-defined] # noqa: F821 -def collect_metadata(rules: List[Path]): +def collect_metadata(rules: list[Path]): md5 = get_file_md5() sha256 = get_file_sha256() diff --git a/capa/helpers.py b/capa/helpers.py index 4505647c4..b1c9c2801 100644 --- a/capa/helpers.py +++ b/capa/helpers.py @@ -14,7 +14,7 @@ import tempfile import contextlib import importlib.util -from typing import Dict, List, Union, BinaryIO, Iterator, NoReturn +from typing import BinaryIO, Iterator, NoReturn from pathlib import Path from zipfile import ZipFile from datetime import datetime @@ -164,7 +164,7 @@ def load_json_from_path(json_path: Path): return report -def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): +def decode_json_lines(fd: BinaryIO | gzip.GzipFile): for line in fd: try: line_s = line.strip().decode() @@ -175,7 +175,7 @@ def decode_json_lines(fd: Union[BinaryIO, gzip.GzipFile]): logger.debug("bad DRAKVUF log line: %s", line) -def load_jsonl_from_path(jsonl_path: Path) -> Iterator[Dict]: +def load_jsonl_from_path(jsonl_path: Path) -> Iterator[dict]: try: with gzip.open(jsonl_path, "rb") as fg: yield from decode_json_lines(fg) @@ -204,7 +204,7 @@ def get_format_from_report(sample: Path) -> str: return FORMAT_DRAKVUF elif sample.name.endswith(".zip"): with ZipFile(sample, "r") as zipfile: - namelist: List[str] = zipfile.namelist() + namelist: list[str] = zipfile.namelist() if "logs/summary_v2.json" in namelist and "logs/flog.xml" in namelist: # assume VMRay zipfile at a minimum has these files return FORMAT_VMRAY @@ -331,17 +331,6 @@ def log_unsupported_arch_error(): logger.error("-" * 80) -def log_unsupported_runtime_error(): - logger.error("-" * 80) - logger.error(" Unsupported runtime or Python interpreter.") - logger.error(" ") - logger.error(" capa supports running under Python 3.8 and higher.") - logger.error(" ") - logger.error(" If you're seeing this message on the command line,") - logger.error(" please ensure you're running a supported Python version.") - logger.error("-" * 80) - - def is_running_standalone() -> bool: """ are we running from a PyInstaller'd executable? diff --git a/capa/ida/helpers.py b/capa/ida/helpers.py index 066e8605d..df231c496 100644 --- a/capa/ida/helpers.py +++ b/capa/ida/helpers.py @@ -8,7 +8,7 @@ import logging import datetime import contextlib -from typing import List, Optional +from typing import Optional from pathlib import Path import idc @@ -165,7 +165,7 @@ def get_file_sha256(): return sha256 -def collect_metadata(rules: List[Path]): +def collect_metadata(rules: list[Path]): """ """ md5 = get_file_md5() sha256 = get_file_sha256() diff --git a/capa/ida/plugin/README.md b/capa/ida/plugin/README.md index 0af39a4ad..e904b7adf 100644 --- a/capa/ida/plugin/README.md +++ b/capa/ida/plugin/README.md @@ -96,7 +96,7 @@ can update using the `Settings` button. ### Requirements -capa explorer supports Python versions >= 3.8.x and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: +capa explorer supports Python versions >= 3.10 and IDA Pro versions >= 7.4. The following IDA Pro versions have been tested: * IDA 7.4 * IDA 7.5 @@ -105,8 +105,9 @@ capa explorer supports Python versions >= 3.8.x and IDA Pro versions >= 7.4. The * IDA 8.0 * IDA 8.1 * IDA 8.2 +* IDA 9.0 -capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.8.x). +capa explorer is however limited to the Python versions supported by your IDA installation (which may not include all Python versions >= 3.10). If you encounter issues with your specific setup, please open a new [Issue](https://github.com/mandiant/capa/issues). diff --git a/capa/ida/plugin/cache.py b/capa/ida/plugin/cache.py index 42910ee56..1473b5046 100644 --- a/capa/ida/plugin/cache.py +++ b/capa/ida/plugin/cache.py @@ -10,7 +10,7 @@ import itertools import collections -from typing import Set, Dict, Tuple, Union, Optional +from typing import Union, Optional import capa.engine from capa.rules import Scope, RuleSet @@ -34,7 +34,7 @@ def __init__( self.parent.children.add(self) self.features: FeatureSet = collections.defaultdict(set) - self.children: Set[CapaRuleGenFeatureCacheNode] = set() + self.children: set[CapaRuleGenFeatureCacheNode] = set() def __hash__(self): # TODO(mike-hunhoff): confirm this is unique enough @@ -55,9 +55,9 @@ def __init__(self, extractor: CapaExplorerFeatureExtractor): self.global_features: FeatureSet = collections.defaultdict(set) self.file_node: CapaRuleGenFeatureCacheNode = CapaRuleGenFeatureCacheNode(None, None) - self.func_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} - self.bb_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} - self.insn_nodes: Dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.func_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.bb_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} + self.insn_nodes: dict[Address, CapaRuleGenFeatureCacheNode] = {} self._find_global_features() self._find_file_features() @@ -115,7 +115,7 @@ def _find_function_and_below_features(self, fh: FunctionHandle): def _find_instruction_capabilities( self, ruleset: RuleSet, insn: CapaRuleGenFeatureCacheNode - ) -> Tuple[FeatureSet, MatchResults]: + ) -> tuple[FeatureSet, MatchResults]: features: FeatureSet = collections.defaultdict(set) for feature, locs in itertools.chain(insn.features.items(), self.global_features.items()): @@ -131,7 +131,7 @@ def _find_instruction_capabilities( def _find_basic_block_capabilities( self, ruleset: RuleSet, bb: CapaRuleGenFeatureCacheNode - ) -> Tuple[FeatureSet, MatchResults, MatchResults]: + ) -> tuple[FeatureSet, MatchResults, MatchResults]: features: FeatureSet = collections.defaultdict(set) insn_matches: MatchResults = collections.defaultdict(list) @@ -155,7 +155,7 @@ def _find_basic_block_capabilities( def find_code_capabilities( self, ruleset: RuleSet, fh: FunctionHandle - ) -> Tuple[FeatureSet, MatchResults, MatchResults, MatchResults]: + ) -> tuple[FeatureSet, MatchResults, MatchResults, MatchResults]: f_node: Optional[CapaRuleGenFeatureCacheNode] = self._get_cached_func_node(fh) if f_node is None: return {}, {}, {}, {} @@ -179,7 +179,7 @@ def find_code_capabilities( _, function_matches = ruleset.match(Scope.FUNCTION, function_features, f_node.address) return function_features, function_matches, bb_matches, insn_matches - def find_file_capabilities(self, ruleset: RuleSet) -> Tuple[FeatureSet, MatchResults]: + def find_file_capabilities(self, ruleset: RuleSet) -> tuple[FeatureSet, MatchResults]: features: FeatureSet = collections.defaultdict(set) for func_node in self.file_node.children: diff --git a/capa/ida/plugin/form.py b/capa/ida/plugin/form.py index 028ce2078..54bd70409 100644 --- a/capa/ida/plugin/form.py +++ b/capa/ida/plugin/form.py @@ -10,7 +10,7 @@ import itertools import collections from enum import IntFlag -from typing import Any, List, Optional +from typing import Any, Optional from pathlib import Path import idaapi @@ -1146,7 +1146,7 @@ def set_rulegen_preview_border_success(self): def update_rule_status(self, rule_text: str): """ """ rule: capa.rules.Rule - rules: List[Rule] + rules: list[Rule] ruleset: capa.rules.RuleSet if self.view_rulegen_editor.invisibleRootItem().childCount() == 0: diff --git a/capa/ida/plugin/item.py b/capa/ida/plugin/item.py index b2be1c141..4e8f1738a 100644 --- a/capa/ida/plugin/item.py +++ b/capa/ida/plugin/item.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import codecs -from typing import List, Iterator, Optional +from typing import Iterator, Optional import idc import idaapi @@ -36,11 +36,11 @@ def ea_to_hex(ea): class CapaExplorerDataItem: """store data for CapaExplorerDataModel""" - def __init__(self, parent: Optional["CapaExplorerDataItem"], data: List[str], can_check=True): + def __init__(self, parent: Optional["CapaExplorerDataItem"], data: list[str], can_check=True): """initialize item""" self.pred = parent self._data = data - self._children: List["CapaExplorerDataItem"] = [] + self._children: list["CapaExplorerDataItem"] = [] self._checked = False self._can_check = can_check diff --git a/capa/ida/plugin/model.py b/capa/ida/plugin/model.py index c3b41670c..0d8221b12 100644 --- a/capa/ida/plugin/model.py +++ b/capa/ida/plugin/model.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -from typing import Set, Dict, List, Tuple, Optional +from typing import Optional from collections import deque import idc @@ -354,7 +354,7 @@ def render_capa_doc_statement_node( parent: CapaExplorerDataItem, match: rd.Match, statement: rd.Statement, - locations: List[Address], + locations: list[Address], doc: rd.ResultDocument, ): """render capa statement read from doc @@ -447,9 +447,9 @@ def render_capa_doc_match(self, parent: CapaExplorerDataItem, match: rd.Match, d def render_capa_doc_by_function(self, doc: rd.ResultDocument): """render rule matches by function meaning each rule match is nested under function where it was found""" - matches_by_function: Dict[AbsoluteVirtualAddress, Tuple[CapaExplorerFunctionItem, Set[str]]] = {} + matches_by_function: dict[AbsoluteVirtualAddress, tuple[CapaExplorerFunctionItem, set[str]]] = {} for rule in rutils.capability_rules(doc): - match_eas: List[int] = [] + match_eas: list[int] = [] # initial pass of rule matches for addr_, _ in rule.matches: @@ -560,7 +560,7 @@ def render_capa_doc_feature_node( parent: CapaExplorerDataItem, match: rd.Match, feature: frzf.Feature, - locations: List[Address], + locations: list[Address], doc: rd.ResultDocument, ): """process capa doc feature node diff --git a/capa/ida/plugin/view.py b/capa/ida/plugin/view.py index b93c31a8c..d5aa113be 100644 --- a/capa/ida/plugin/view.py +++ b/capa/ida/plugin/view.py @@ -6,7 +6,7 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import re -from typing import Dict, Optional +from typing import Optional from collections import Counter import idc @@ -1019,7 +1019,7 @@ def new_leaf_node(self, parent, data, feature=None): return o - def load_features(self, file_features, func_features: Optional[Dict] = None): + def load_features(self, file_features, func_features: Optional[dict] = None): """ """ self.parse_features_for_tree(self.new_parent_node(self, ("File Scope",)), file_features) if func_features: diff --git a/capa/loader.py b/capa/loader.py index f481d7b8d..700d1a3ba 100644 --- a/capa/loader.py +++ b/capa/loader.py @@ -10,7 +10,7 @@ import logging import datetime import contextlib -from typing import Set, Dict, List, Optional +from typing import Optional from pathlib import Path from rich.console import Console @@ -128,7 +128,7 @@ def get_meta_str(vw): return f"{', '.join(meta)}, number of functions: {len(vw.getFunctions())}" -def get_workspace(path: Path, input_format: str, sigpaths: List[Path]): +def get_workspace(path: Path, input_format: str, sigpaths: list[Path]): """ load the program at the given path into a vivisect workspace using the given format. also apply the given FLIRT signatures. @@ -198,7 +198,7 @@ def get_extractor( input_format: str, os_: str, backend: str, - sigpaths: List[Path], + sigpaths: list[Path], should_save_workspace=False, disable_progress=False, sample_path: Optional[Path] = None, @@ -346,7 +346,7 @@ def get_extractor( raise ValueError("unexpected backend: " + backend) -def _get_binexport2_file_extractors(input_file: Path) -> List[FeatureExtractor]: +def _get_binexport2_file_extractors(input_file: Path) -> list[FeatureExtractor]: # I'm not sure this is where this logic should live, but it works for now. # we'll keep this a "private" routine until we're sure. import capa.features.extractors.binexport2 @@ -368,8 +368,8 @@ def _get_binexport2_file_extractors(input_file: Path) -> List[FeatureExtractor]: return [] -def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtractor]: - file_extractors: List[FeatureExtractor] = [] +def get_file_extractors(input_file: Path, input_format: str) -> list[FeatureExtractor]: + file_extractors: list[FeatureExtractor] = [] # we use lazy importing here to avoid eagerly loading dependencies # that some specialized environments may not have, @@ -416,11 +416,11 @@ def get_file_extractors(input_file: Path, input_format: str) -> List[FeatureExtr return file_extractors -def get_signatures(sigs_path: Path) -> List[Path]: +def get_signatures(sigs_path: Path) -> list[Path]: if not sigs_path.exists(): raise IOError(f"signatures path {sigs_path} does not exist or cannot be accessed") - paths: List[Path] = [] + paths: list[Path] = [] if sigs_path.is_file(): paths.append(sigs_path) elif sigs_path.is_dir(): @@ -478,11 +478,11 @@ def get_sample_analysis(format_, arch, os_, extractor, rules_path, counts): def collect_metadata( - argv: List[str], + argv: list[str], input_path: Path, input_format: str, os_: str, - rules_path: List[Path], + rules_path: list[Path], extractor: FeatureExtractor, counts: dict, ) -> rdoc.Metadata: @@ -545,7 +545,7 @@ def compute_dynamic_layout( """ assert isinstance(extractor, DynamicFeatureExtractor) - matched_calls: Set[Address] = set() + matched_calls: set[Address] = set() def result_rec(result: capa.features.common.Result): for loc in result.locations: @@ -558,14 +558,14 @@ def result_rec(result: capa.features.common.Result): for _, result in matches: result_rec(result) - names_by_process: Dict[Address, str] = {} - names_by_call: Dict[Address, str] = {} + names_by_process: dict[Address, str] = {} + names_by_call: dict[Address, str] = {} - matched_processes: Set[Address] = set() - matched_threads: Set[Address] = set() + matched_processes: set[Address] = set() + matched_threads: set[Address] = set() - threads_by_process: Dict[Address, List[Address]] = {} - calls_by_thread: Dict[Address, List[Address]] = {} + threads_by_process: dict[Address, list[Address]] = {} + calls_by_thread: dict[Address, list[Address]] = {} for p in extractor.get_processes(): threads_by_process[p.address] = [] @@ -625,8 +625,8 @@ def compute_static_layout(rules: RuleSet, extractor: StaticFeatureExtractor, cap otherwise, we may pollute the json document with a large amount of un-referenced data. """ - functions_by_bb: Dict[Address, Address] = {} - bbs_by_function: Dict[Address, List[Address]] = {} + functions_by_bb: dict[Address, Address] = {} + bbs_by_function: dict[Address, list[Address]] = {} for f in extractor.get_functions(): bbs_by_function[f.address] = [] for bb in extractor.get_basic_blocks(f): diff --git a/capa/main.py b/capa/main.py index 60c5d638a..46619a66a 100644 --- a/capa/main.py +++ b/capa/main.py @@ -17,7 +17,7 @@ import textwrap import contextlib from types import TracebackType -from typing import Any, Set, Dict, List, Optional, TypedDict +from typing import Any, Optional, TypedDict from pathlib import Path import colorama @@ -129,8 +129,8 @@ class FilterConfig(TypedDict, total=False): - processes: Set[int] - functions: Set[int] + processes: set[int] + functions: set[int] @contextlib.contextmanager @@ -170,7 +170,7 @@ def get_default_root() -> Path: return Path(__file__).resolve().parent.parent -def get_default_signatures() -> List[Path]: +def get_default_signatures() -> list[Path]: """ compute a list of file system paths to the default FLIRT signatures. """ @@ -185,15 +185,11 @@ def get_default_signatures() -> List[Path]: return ret -def simple_message_exception_handler(exctype, value: BaseException, traceback: TracebackType): +def simple_message_exception_handler( + exctype: type[BaseException], value: BaseException, traceback: TracebackType | None +): """ prints friendly message on unexpected exceptions to regular users (debug mode shows regular stack trace) - - args: - # TODO(aaronatp): Once capa drops support for Python 3.8, move the exctype type annotation to - # the function parameters and remove the "# type: ignore[assignment]" from the relevant place - # in the main function, see (https://github.com/mandiant/capa/issues/1896) - exctype (type[BaseException]): exception class """ if exctype is KeyboardInterrupt: @@ -218,7 +214,7 @@ def install_common_args(parser, wanted=None): args: parser (argparse.ArgumentParser): a parser to update in place, adding common arguments. - wanted (Set[str]): collection of arguments to opt-into, including: + wanted (set[str]): collection of arguments to opt-into, including: - "input_file": required positional argument to input file. - "format": flag to override file format. - "os": flag to override file operating system. @@ -455,13 +451,13 @@ def handle_common_args(args): raise RuntimeError("unexpected --color value: " + args.color) if not args.debug: - sys.excepthook = simple_message_exception_handler # type: ignore[assignment] + sys.excepthook = simple_message_exception_handler if hasattr(args, "input_file"): args.input_file = Path(args.input_file) if hasattr(args, "rules"): - rules_paths: List[Path] = [] + rules_paths: list[Path] = [] if args.rules == [RULES_PATH_DEFAULT_STRING]: logger.debug("-" * 80) @@ -699,7 +695,7 @@ def get_rules_from_cli(args) -> RuleSet: return rules -def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtractor]: +def get_file_extractors_from_cli(args, input_format: str) -> list[FeatureExtractor]: """ args: args: The parsed command line arguments from `install_common_args`. @@ -745,7 +741,7 @@ def get_file_extractors_from_cli(args, input_format: str) -> List[FeatureExtract raise ShouldExitError(E_INVALID_FILE_TYPE) from e -def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[FeatureExtractor]) -> bool: +def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: list[FeatureExtractor]) -> bool: """ args: args: The parsed command line arguments from `install_common_args`. @@ -780,7 +776,7 @@ def find_file_limitations_from_cli(args, rules: RuleSet, file_extractors: List[F return found_file_limitation -def get_signatures_from_cli(args, input_format: str, backend: str) -> List[Path]: +def get_signatures_from_cli(args, input_format: str, backend: str) -> list[Path]: if backend != BACKEND_VIV: logger.debug("skipping library code matching: only supported by the vivisect backend") return [] @@ -900,13 +896,9 @@ def apply_extractor_filters(extractor: FeatureExtractor, extractor_filters: Filt raise ShouldExitError(E_INVALID_FEATURE_EXTRACTOR) -def main(argv: Optional[List[str]] = None): - if sys.version_info < (3, 8): - raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+") - elif sys.version_info < (3, 10): - from warnings import warn - - warn("This is the last capa version supporting Python 3.8 and 3.9.", DeprecationWarning, stacklevel=2) +def main(argv: Optional[list[str]] = None): + if sys.version_info < (3, 10): + raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.10+") if argv is None: argv = sys.argv[1:] @@ -975,7 +967,7 @@ def main(argv: Optional[List[str]] = None): meta: rdoc.Metadata capabilities: MatchResults - counts: Dict[str, Any] + counts: dict[str, Any] if input_format == FORMAT_RESULT: # result document directly parses into meta, capabilities diff --git a/capa/perf.py b/capa/perf.py index 2dcdb4603..38962222f 100644 --- a/capa/perf.py +++ b/capa/perf.py @@ -5,11 +5,10 @@ # Unless required by applicable law or agreed to in writing, software distributed under the License # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. -import typing import collections # this structure is unstable and may change before the next major release. -counters: typing.Counter[str] = collections.Counter() +counters: collections.Counter[str] = collections.Counter() def reset(): diff --git a/capa/render/proto/__init__.py b/capa/render/proto/__init__.py index ed4c690e1..dbde5949c 100644 --- a/capa/render/proto/__init__.py +++ b/capa/render/proto/__init__.py @@ -25,7 +25,7 @@ Alternatively, --pyi_out=. can be used to generate a Python Interface file that supports development """ import datetime -from typing import Any, Dict, Union +from typing import Any, Union import google.protobuf.json_format @@ -553,7 +553,7 @@ def rule_metadata_to_pb2(rule_metadata: rd.RuleMetadata) -> capa_pb2.RuleMetadat def doc_to_pb2(doc: rd.ResultDocument) -> capa_pb2.ResultDocument: - rule_matches: Dict[str, capa_pb2.RuleMatches] = {} + rule_matches: dict[str, capa_pb2.RuleMatches] = {} for rule_name, matches in doc.rules.items(): m = capa_pb2.RuleMatches( meta=rule_metadata_to_pb2(matches.meta), @@ -977,7 +977,7 @@ def rule_metadata_from_pb2(pb: capa_pb2.RuleMetadata) -> rd.RuleMetadata: def doc_from_pb2(doc: capa_pb2.ResultDocument) -> rd.ResultDocument: - rule_matches: Dict[str, rd.RuleMatches] = {} + rule_matches: dict[str, rd.RuleMatches] = {} for rule_name, matches in doc.rules.items(): m = rd.RuleMatches( meta=rule_metadata_from_pb2(matches.meta), diff --git a/capa/render/result_document.py b/capa/render/result_document.py index ab6b03979..8aece5c9c 100644 --- a/capa/render/result_document.py +++ b/capa/render/result_document.py @@ -8,11 +8,10 @@ import datetime import collections from enum import Enum -from typing import Dict, List, Tuple, Union, Literal, Optional +from typing import Union, Literal, Optional, TypeAlias from pathlib import Path from pydantic import Field, BaseModel, ConfigDict -from typing_extensions import TypeAlias import capa.rules import capa.engine @@ -46,7 +45,7 @@ class BasicBlockLayout(Model): class FunctionLayout(Model): address: frz.Address - matched_basic_blocks: Tuple[BasicBlockLayout, ...] + matched_basic_blocks: tuple[BasicBlockLayout, ...] class CallLayout(Model): @@ -56,21 +55,21 @@ class CallLayout(Model): class ThreadLayout(Model): address: frz.Address - matched_calls: Tuple[CallLayout, ...] + matched_calls: tuple[CallLayout, ...] class ProcessLayout(Model): address: frz.Address name: str - matched_threads: Tuple[ThreadLayout, ...] + matched_threads: tuple[ThreadLayout, ...] class StaticLayout(Model): - functions: Tuple[FunctionLayout, ...] + functions: tuple[FunctionLayout, ...] class DynamicLayout(Model): - processes: Tuple[ProcessLayout, ...] + processes: tuple[ProcessLayout, ...] Layout: TypeAlias = Union[StaticLayout, DynamicLayout] @@ -93,12 +92,12 @@ class ProcessFeatureCount(Model): class StaticFeatureCounts(Model): file: int - functions: Tuple[FunctionFeatureCount, ...] + functions: tuple[FunctionFeatureCount, ...] class DynamicFeatureCounts(Model): file: int - processes: Tuple[ProcessFeatureCount, ...] + processes: tuple[ProcessFeatureCount, ...] FeatureCounts: TypeAlias = Union[StaticFeatureCounts, DynamicFeatureCounts] @@ -109,11 +108,11 @@ class StaticAnalysis(Model): arch: str os: str extractor: str - rules: Tuple[str, ...] + rules: tuple[str, ...] base_address: frz.Address layout: StaticLayout feature_counts: StaticFeatureCounts - library_functions: Tuple[LibraryFunction, ...] + library_functions: tuple[LibraryFunction, ...] class DynamicAnalysis(Model): @@ -121,7 +120,7 @@ class DynamicAnalysis(Model): arch: str os: str extractor: str - rules: Tuple[str, ...] + rules: tuple[str, ...] layout: DynamicLayout feature_counts: DynamicFeatureCounts @@ -137,7 +136,7 @@ class Flavor(str, Enum): class Metadata(Model): timestamp: datetime.datetime version: str - argv: Optional[Tuple[str, ...]] + argv: Optional[tuple[str, ...]] sample: Sample flavor: Flavor analysis: Analysis @@ -254,7 +253,7 @@ def node_from_capa(node: Union[capa.engine.Statement, capa.engine.Feature]) -> N def node_to_capa( - node: Node, children: List[Union[capa.engine.Statement, capa.engine.Feature]] + node: Node, children: list[Union[capa.engine.Statement, capa.engine.Feature]] ) -> Union[capa.engine.Statement, capa.engine.Feature]: if isinstance(node, StatementNode): if isinstance(node.statement, CompoundStatement): @@ -313,9 +312,9 @@ class Match(FrozenModel): success: bool node: Node - children: Tuple["Match", ...] - locations: Tuple[frz.Address, ...] - captures: Dict[str, Tuple[frz.Address, ...]] + children: tuple["Match", ...] + locations: tuple[frz.Address, ...] + captures: dict[str, tuple[frz.Address, ...]] @classmethod def from_capa( @@ -435,7 +434,7 @@ def from_capa( captures={capture: tuple(captures[capture]) for capture in captures}, ) - def to_capa(self, rules_by_name: Dict[str, capa.rules.Rule]) -> capa.engine.Result: + def to_capa(self, rules_by_name: dict[str, capa.rules.Rule]) -> capa.engine.Result: children = [child.to_capa(rules_by_name) for child in self.children] statement = node_to_capa(self.node, [child.statement for child in children]) @@ -492,7 +491,7 @@ class AttackSpec(FrozenModel): id: like `Identifier` above, perhaps "T1059.006" """ - parts: Tuple[str, ...] + parts: tuple[str, ...] tactic: str technique: str subtechnique: str @@ -532,7 +531,7 @@ class MBCSpec(FrozenModel): id: like `Identifier` above, perhaps "E1056.m01" """ - parts: Tuple[str, ...] + parts: tuple[str, ...] objective: str behavior: str method: str @@ -572,12 +571,12 @@ class MaecMetadata(FrozenModel): class RuleMetadata(FrozenModel): name: str namespace: Optional[str] = None - authors: Tuple[str, ...] + authors: tuple[str, ...] scopes: capa.rules.Scopes - attack: Tuple[AttackSpec, ...] = Field(alias="att&ck") - mbc: Tuple[MBCSpec, ...] - references: Tuple[str, ...] - examples: Tuple[str, ...] + attack: tuple[AttackSpec, ...] = Field(alias="att&ck") + mbc: tuple[MBCSpec, ...] + references: tuple[str, ...] + examples: tuple[str, ...] description: str lib: bool = Field(False, alias="lib") @@ -621,16 +620,16 @@ class RuleMatches(FrozenModel): meta: RuleMetadata source: str - matches: Tuple[Tuple[frz.Address, Match], ...] + matches: tuple[tuple[frz.Address, Match], ...] class ResultDocument(FrozenModel): meta: Metadata - rules: Dict[str, RuleMatches] + rules: dict[str, RuleMatches] @classmethod def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) -> "ResultDocument": - rule_matches: Dict[str, RuleMatches] = {} + rule_matches: dict[str, RuleMatches] = {} for rule_name, matches in capabilities.items(): rule = rules[rule_name] @@ -648,8 +647,8 @@ def from_capa(cls, meta: Metadata, rules: RuleSet, capabilities: MatchResults) - return ResultDocument(meta=meta, rules=rule_matches) - def to_capa(self) -> Tuple[Metadata, Dict]: - capabilities: Dict[str, List[Tuple[capa.features.address.Address, capa.features.common.Result]]] = ( + def to_capa(self) -> tuple[Metadata, dict]: + capabilities: dict[str, list[tuple[capa.features.address.Address, capa.features.common.Result]]] = ( collections.defaultdict(list) ) diff --git a/capa/render/utils.py b/capa/render/utils.py index 73ed1d296..6f42f249a 100644 --- a/capa/render/utils.py +++ b/capa/render/utils.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import io -from typing import Dict, List, Tuple, Union, Iterator, Optional +from typing import Union, Iterator, Optional import rich.console from rich.progress import Text @@ -41,7 +41,7 @@ def format_parts_id(data: Union[rd.AttackSpec, rd.MBCSpec]): return f"{'::'.join(data.parts)} [{data.id}]" -def sort_rules(rules: Dict[str, rd.RuleMatches]) -> List[Tuple[Optional[str], str, rd.RuleMatches]]: +def sort_rules(rules: dict[str, rd.RuleMatches]) -> list[tuple[Optional[str], str, rd.RuleMatches]]: """Sort rules by namespace and name.""" return sorted((rule.meta.namespace or "", rule.meta.name, rule) for rule in rules.values()) diff --git a/capa/render/vverbose.py b/capa/render/vverbose.py index 9c45119e5..c021bfbb0 100644 --- a/capa/render/vverbose.py +++ b/capa/render/vverbose.py @@ -7,7 +7,7 @@ # See the License for the specific language governing permissions and limitations under the License. import logging import textwrap -from typing import Dict, Iterable, Optional +from typing import Iterable, Optional from rich.text import Text from rich.table import Table @@ -323,7 +323,7 @@ def render_rules(console: Console, doc: rd.ResultDocument): """ import capa.render.verbose as v - functions_by_bb: Dict[capa.features.address.Address, capa.features.address.Address] = {} + functions_by_bb: dict[capa.features.address.Address, capa.features.address.Address] = {} if isinstance(doc.meta.analysis, rd.StaticAnalysis): for finfo in doc.meta.analysis.layout.functions: faddress = finfo.address.to_capa() diff --git a/capa/rules/__init__.py b/capa/rules/__init__.py index 1d7f13ed1..11c0c0c2a 100644 --- a/capa/rules/__init__.py +++ b/capa/rules/__init__.py @@ -26,7 +26,7 @@ # https://github.com/python/mypy/issues/1153 from backports.functools_lru_cache import lru_cache # type: ignore -from typing import Any, Set, Dict, List, Tuple, Union, Callable, Iterator, Optional, cast +from typing import Any, Union, Callable, Iterator, Optional, cast from dataclasses import asdict, dataclass import yaml @@ -132,10 +132,10 @@ def __repr__(self) -> str: raise ValueError("invalid rules class. at least one scope must be specified") @classmethod - def from_dict(self, scopes: Dict[str, str]) -> "Scopes": + def from_dict(self, scopes: dict[str, str]) -> "Scopes": # make local copy so we don't make changes outside of this routine. # we'll use the value None to indicate the scope is not supported. - scopes_: Dict[str, Optional[str]] = dict(scopes) + scopes_: dict[str, Optional[str]] = dict(scopes) # mark non-specified scopes as invalid if "static" not in scopes_: @@ -168,7 +168,7 @@ def from_dict(self, scopes: Dict[str, str]) -> "Scopes": ) -SUPPORTED_FEATURES: Dict[str, Set] = { +SUPPORTED_FEATURES: dict[str, set] = { Scope.GLOBAL: { # these will be added to other scopes, see below. capa.features.common.OS, @@ -297,7 +297,7 @@ def __repr__(self): def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Statement]): # construct a dict of all supported features - supported_features: Set = set() + supported_features: set = set() if scopes.static: supported_features.update(SUPPORTED_FEATURES[scopes.static]) if scopes.dynamic: @@ -322,12 +322,12 @@ def ensure_feature_valid_for_scopes(scopes: Scopes, feature: Union[Feature, Stat def translate_com_feature(com_name: str, com_type: ComType) -> ceng.Statement: com_db = capa.features.com.load_com_database(com_type) - guids: Optional[List[str]] = com_db.get(com_name) + guids: Optional[list[str]] = com_db.get(com_name) if not guids: logger.error(" %s doesn't exist in COM %s database", com_name, com_type) raise InvalidRule(f"'{com_name}' doesn't exist in COM {com_type} database") - com_features: List[Feature] = [] + com_features: list[Feature] = [] for guid in guids: hex_chars = guid.replace("-", "") h = [hex_chars[i : i + 2] for i in range(0, len(hex_chars), 2)] @@ -823,11 +823,11 @@ def build_statements(d, scopes: Scopes): return feature -def first(s: List[Any]) -> Any: +def first(s: list[Any]) -> Any: return s[0] -def second(s: List[Any]) -> Any: +def second(s: list[Any]) -> Any: return s[1] @@ -853,13 +853,13 @@ def get_dependencies(self, namespaces): compute the transitive dependency graph themself, if they want it. Args: - namespaces(Dict[str, List[Rule]]): mapping from namespace name to rules in it. + namespaces(dict[str, list[Rule]]): mapping from namespace name to rules in it. see `index_rules_by_namespace`. Returns: - List[str]: names of rules upon which this rule depends. + list[str]: names of rules upon which this rule depends. """ - deps: Set[str] = set() + deps: set[str] = set() def rec(statement): if isinstance(statement, capa.features.common.MatchedRule): @@ -968,8 +968,8 @@ def extract_subscope_rules(self): yield from self._extract_subscope_rules_rec(self.statement) - def _extract_all_features_rec(self, statement) -> Set[Feature]: - feature_set: Set[Feature] = set() + def _extract_all_features_rec(self, statement) -> set[Feature]: + feature_set: set[Feature] = set() for child in statement.get_children(): if isinstance(child, Statement): @@ -978,7 +978,7 @@ def _extract_all_features_rec(self, statement) -> Set[Feature]: feature_set.add(child) return feature_set - def extract_all_features(self) -> Set[Feature]: + def extract_all_features(self) -> set[Feature]: """ recursively extracts all feature statements in this rule. @@ -1001,7 +1001,7 @@ def evaluate(self, features: FeatureSet, short_circuit=True): return self.statement.evaluate(features, short_circuit=short_circuit) @classmethod - def from_dict(cls, d: Dict[str, Any], definition: str) -> "Rule": + def from_dict(cls, d: dict[str, Any], definition: str) -> "Rule": meta = d["rule"]["meta"] name = meta["name"] @@ -1214,14 +1214,14 @@ def move_to_end(m, k): return doc -def get_rules_with_scope(rules, scope: Scope) -> List[Rule]: +def get_rules_with_scope(rules, scope: Scope) -> list[Rule]: """ from the given collection of rules, select those with the given scope. """ return [rule for rule in rules if scope in rule.scopes] -def get_rules_and_dependencies(rules: List[Rule], rule_name: str) -> Iterator[Rule]: +def get_rules_and_dependencies(rules: list[Rule], rule_name: str) -> Iterator[Rule]: """ from the given collection of rules, select a rule and its dependencies (transitively). """ @@ -1249,7 +1249,7 @@ def rec(rule: Rule): yield rule -def ensure_rules_are_unique(rules: List[Rule]) -> None: +def ensure_rules_are_unique(rules: list[Rule]) -> None: seen = set() for rule in rules: if rule.name in seen: @@ -1257,7 +1257,7 @@ def ensure_rules_are_unique(rules: List[Rule]) -> None: seen.add(rule.name) -def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: +def ensure_rule_dependencies_are_met(rules: list[Rule]) -> None: """ raise an exception if a rule dependency does not exist. @@ -1274,7 +1274,7 @@ def ensure_rule_dependencies_are_met(rules: List[Rule]) -> None: raise InvalidRule(f'rule "{rule.name}" depends on missing rule "{dep}"') -def index_rules_by_namespace(rules: List[Rule]) -> Dict[str, List[Rule]]: +def index_rules_by_namespace(rules: list[Rule]) -> dict[str, list[Rule]]: """ compute the rules that fit into each namespace found within the given rules. @@ -1303,7 +1303,7 @@ def index_rules_by_namespace(rules: List[Rule]) -> Dict[str, List[Rule]]: return dict(namespaces) -def topologically_order_rules(rules: List[Rule]) -> List[Rule]: +def topologically_order_rules(rules: list[Rule]) -> list[Rule]: """ order the given rules such that dependencies show up before dependents. this means that as we match rules, we can add features for the matches, and these @@ -1351,7 +1351,7 @@ class RuleSet: def __init__( self, - rules: List[Rule], + rules: list[Rule], ): super().__init__() @@ -1389,7 +1389,7 @@ def __init__( self.rules_by_scope = {scope: self._get_rules_for_scope(rules, scope) for scope in scopes} # these structures are unstable and may change before the next major release. - scores_by_rule: Dict[str, int] = {} + scores_by_rule: dict[str, int] = {} self._feature_indexes_by_scopes = { scope: self._index_rules_by_feature(scope, self.rules_by_scope[scope], scores_by_rule) for scope in scopes } @@ -1433,7 +1433,7 @@ def __contains__(self, rulename): # this routine is unstable and may change before the next major release. @staticmethod - def _score_feature(scores_by_rule: Dict[str, int], node: capa.features.common.Feature) -> int: + def _score_feature(scores_by_rule: dict[str, int], node: capa.features.common.Feature) -> int: """ Score the given feature by how "uncommon" we think it will be. Features that we expect to be very selective (ie. uniquely identify a rule and be required to match), @@ -1577,17 +1577,17 @@ def _score_feature(scores_by_rule: Dict[str, int], node: capa.features.common.Fe @dataclass class _RuleFeatureIndex: # Mapping from hashable feature to a list of rules that might have this feature. - rules_by_feature: Dict[Feature, Set[str]] + rules_by_feature: dict[Feature, set[str]] # Mapping from rule name to list of Regex/Substring features that have to match. # All these features will be evaluated whenever a String feature is encountered. - string_rules: Dict[str, List[Feature]] + string_rules: dict[str, list[Feature]] # Mapping from rule name to list of Bytes features that have to match. # All these features will be evaluated whenever a Bytes feature is encountered. - bytes_rules: Dict[str, List[Feature]] + bytes_rules: dict[str, list[Feature]] # this routine is unstable and may change before the next major release. @staticmethod - def _index_rules_by_feature(scope: Scope, rules: List[Rule], scores_by_rule: Dict[str, int]) -> _RuleFeatureIndex: + def _index_rules_by_feature(scope: Scope, rules: list[Rule], scores_by_rule: dict[str, int]) -> _RuleFeatureIndex: """ Index the given rules by their minimal set of most "uncommon" features required to match. @@ -1595,12 +1595,12 @@ def _index_rules_by_feature(scope: Scope, rules: List[Rule], scores_by_rule: Dic (which are not hashable and require a scan) that have to match, too. """ - rules_by_feature: Dict[Feature, Set[str]] = collections.defaultdict(set) + rules_by_feature: dict[Feature, set[str]] = collections.defaultdict(set) def rec( rule_name: str, node: Union[Feature, Statement], - ) -> Optional[Tuple[int, Set[Feature]]]: + ) -> Optional[tuple[int, set[Feature]]]: """ Walk through a rule's logic tree, picking the features to use for indexing, returning the feature and an associated score. @@ -1667,7 +1667,7 @@ def rec( # # In this case, we prefer to pick the pair of API features since each is expected # to be more common than the mnemonic. - scores: List[Tuple[int, Set[Feature]]] = [] + scores: list[tuple[int, set[Feature]]] = [] for child in node.children: score = rec(rule_name, child) @@ -1734,8 +1734,8 @@ def and_score_key(item): # These are the Regex/Substring/Bytes features that we have to use for filtering. # Ideally we find a way to get rid of all of these, eventually. - string_rules: Dict[str, List[Feature]] = {} - bytes_rules: Dict[str, List[Feature]] = {} + string_rules: dict[str, list[Feature]] = {} + bytes_rules: dict[str, list[Feature]] = {} for rule in rules: rule_name = rule.meta["name"] @@ -1765,10 +1765,10 @@ def and_score_key(item): logger.debug(" : [%d] %s", RuleSet._score_feature(scores_by_rule, feature), feature) if string_features: - string_rules[rule_name] = cast(List[Feature], string_features) + string_rules[rule_name] = cast(list[Feature], string_features) if bytes_features: - bytes_rules[rule_name] = cast(List[Feature], bytes_features) + bytes_rules[rule_name] = cast(list[Feature], bytes_features) for feature in hashable_features: rules_by_feature[feature].add(rule_name) @@ -1785,7 +1785,7 @@ def and_score_key(item): return RuleSet._RuleFeatureIndex(rules_by_feature, string_rules, bytes_rules) @staticmethod - def _get_rules_for_scope(rules, scope) -> List[Rule]: + def _get_rules_for_scope(rules, scope) -> list[Rule]: """ given a collection of rules, collect the rules that are needed at the given scope. these rules are ordered topologically. @@ -1793,7 +1793,7 @@ def _get_rules_for_scope(rules, scope) -> List[Rule]: don't include auto-generated "subscope" rules. we want to include general "lib" rules here - even if they are not dependencies of other rules, see #398 """ - scope_rules: Set[Rule] = set() + scope_rules: set[Rule] = set() # we need to process all rules, not just rules with the given scope. # this is because rules with a higher scope, e.g. file scope, may have subscope rules @@ -1807,7 +1807,7 @@ def _get_rules_for_scope(rules, scope) -> List[Rule]: return get_rules_with_scope(topologically_order_rules(list(scope_rules)), scope) @staticmethod - def _extract_subscope_rules(rules) -> List[Rule]: + def _extract_subscope_rules(rules) -> list[Rule]: """ process the given sequence of rules. for each one, extract any embedded subscope rules into their own rule. @@ -1854,16 +1854,16 @@ def filter_rules_by_meta(self, tag: str) -> "RuleSet": # this routine is unstable and may change before the next major release. @staticmethod - def _sort_rules_by_index(rule_index_by_rule_name: Dict[str, int], rules: List[Rule]): + def _sort_rules_by_index(rule_index_by_rule_name: dict[str, int], rules: list[Rule]): """ - Sort (in place) the given rules by their index provided by the given Dict. + Sort (in place) the given rules by their index provided by the given dict. This mapping is intended to represent the topologic index of the given rule; that is, rules with a lower index should be evaluated first, since their dependencies will be evaluated later. """ rules.sort(key=lambda r: rule_index_by_rule_name[r.name]) - def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[FeatureSet, ceng.MatchResults]: + def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> tuple[FeatureSet, ceng.MatchResults]: """ Match rules from this ruleset at the given scope against the given features. @@ -1872,7 +1872,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea """ feature_index: RuleSet._RuleFeatureIndex = self._feature_indexes_by_scopes[scope] - rules: List[Rule] = self.rules_by_scope[scope] + rules: list[Rule] = self.rules_by_scope[scope] # Topologic location of rule given its name. # That is, rules with a lower index should be evaluated first, since their dependencies # will be evaluated later. @@ -1908,7 +1908,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea # Find all the rules that could match the given feature set. # Ideally we want this set to be as small and focused as possible, # and we can tune it by tweaking `_index_rules_by_feature`. - candidate_rule_names: Set[str] = set() + candidate_rule_names: set[str] = set() for feature in features: candidate_rule_names.update(feature_index.rules_by_feature.get(feature, ())) @@ -2018,7 +2018,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea new_features.append(capa.features.common.MatchedRule(namespace)) if new_features: - new_candidates: List[str] = [] + new_candidates: list[str] = [] for new_feature in new_features: new_candidates.extend(feature_index.rules_by_feature.get(new_feature, ())) @@ -2031,7 +2031,7 @@ def _match(self, scope: Scope, features: FeatureSet, addr: Address) -> Tuple[Fea def match( self, scope: Scope, features: FeatureSet, addr: Address, paranoid=False - ) -> Tuple[FeatureSet, ceng.MatchResults]: + ) -> tuple[FeatureSet, ceng.MatchResults]: """ Match rules from this ruleset at the given scope against the given features. @@ -2053,7 +2053,7 @@ def match( features, matches = self._match(scope, features, addr) if paranoid: - rules: List[Rule] = self.rules_by_scope[scope] + rules: list[Rule] = self.rules_by_scope[scope] paranoid_features, paranoid_matches = capa.engine.match(rules, features, addr) if features != paranoid_features: @@ -2086,7 +2086,7 @@ def is_nursery_rule_path(path: Path) -> bool: return "nursery" in path.parts -def collect_rule_file_paths(rule_paths: List[Path]) -> List[Path]: +def collect_rule_file_paths(rule_paths: list[Path]) -> list[Path]: """ collect all rule file paths, including those in subdirectories. """ @@ -2127,7 +2127,7 @@ def on_load_rule_default(_path: RulePath, i: int, _total: int) -> None: def get_rules( - rule_paths: List[RulePath], + rule_paths: list[RulePath], cache_dir=None, on_load_rule: Callable[[RulePath, int, int], None] = on_load_rule_default, enable_cache: bool = True, @@ -2154,7 +2154,7 @@ def get_rules( if ruleset is not None: return ruleset - rules: List[Rule] = [] + rules: list[Rule] = [] total_rule_count = len(rule_file_paths) for i, (path, content) in enumerate(zip(rule_file_paths, rule_contents)): diff --git a/capa/rules/cache.py b/capa/rules/cache.py index 6f87570ef..57537e959 100644 --- a/capa/rules/cache.py +++ b/capa/rules/cache.py @@ -11,7 +11,7 @@ import pickle import hashlib import logging -from typing import List, Optional +from typing import Optional from pathlib import Path from dataclasses import dataclass @@ -26,7 +26,7 @@ CacheIdentifier = str -def compute_cache_identifier(rule_content: List[bytes]) -> CacheIdentifier: +def compute_cache_identifier(rule_content: list[bytes]) -> CacheIdentifier: hash = hashlib.sha256() # note that this changes with each release, @@ -96,7 +96,7 @@ def load(data): return cache -def get_ruleset_content(ruleset: capa.rules.RuleSet) -> List[bytes]: +def get_ruleset_content(ruleset: capa.rules.RuleSet) -> list[bytes]: rule_contents = [] for rule in ruleset.rules.values(): if rule.is_subscope_rule(): @@ -132,7 +132,7 @@ def cache_ruleset(cache_dir: Path, ruleset: capa.rules.RuleSet): return -def load_cached_ruleset(cache_dir: Path, rule_contents: List[bytes]) -> Optional[capa.rules.RuleSet]: +def load_cached_ruleset(cache_dir: Path, rule_contents: list[bytes]) -> Optional[capa.rules.RuleSet]: """ load a cached ruleset from disk, using the given cache directory. the raw rule contents are required here to prove that the rules haven't changed diff --git a/pyproject.toml b/pyproject.toml index 3416c3a9b..f8e9c3bc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ authors = [ description = "The FLARE team's open-source tool to identify capabilities in executable files." readme = {file = "README.md", content-type = "text/markdown"} license = {file = "LICENSE.txt"} -requires-python = ">=3.8.1" +requires-python = ">=3.10" keywords = ["malware analysis", "reverse engineering", "capability detection", "software behaviors", "capa", "FLARE"] classifiers = [ "Development Status :: 5 - Production/Stable", @@ -95,9 +95,7 @@ dependencies = [ # typically due to dropping support for python releases # we still support. - # TODO(williballenthin): networkx 3.2 doesn't support python 3.8 while capa does. - # https://github.com/mandiant/capa/issues/1966 - "networkx>=3,<3.2", + "networkx>=3", "dnfile>=0.15.0", ] @@ -235,7 +233,7 @@ DEP002 = [ # dependencies imported but missing from definitions DEP003 = [ - "typing_extensions" # TODO(s-ff): remove when Python 3.9 is deprecated, see #1699 + "typing_extensions" # TODO(s-ff): remove when Python 3.10 is deprecated, see #1699 ] [tool.deptry.package_module_name_map] diff --git a/requirements.txt b/requirements.txt index 7d7f10cca..7e6354282 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ intervaltree==3.1.0 markdown-it-py==3.0.0 mdurl==0.1.2 msgpack==1.0.8 -networkx==3.1 +networkx==3.4.2 pefile==2024.8.26 pip==24.2 protobuf==5.28.2 diff --git a/scripts/capa-as-library.py b/scripts/capa-as-library.py index bb1c2102a..0555a0263 100644 --- a/scripts/capa-as-library.py +++ b/scripts/capa-as-library.py @@ -9,7 +9,7 @@ import json import collections -from typing import Any, Set, Dict +from typing import Any from pathlib import Path import capa.main @@ -34,7 +34,7 @@ def render_meta(doc: rd.ResultDocument, result): result["path"] = doc.meta.sample.path -def find_subrule_matches(doc: rd.ResultDocument) -> Set[str]: +def find_subrule_matches(doc: rd.ResultDocument) -> set[str]: """ collect the rule names that have been matched as a subrule match. this way we can avoid displaying entries for things that are too specific. @@ -158,8 +158,8 @@ def render_mbc(doc, result): result["MBC"].setdefault(objective.upper(), inner_rows) -def render_dictionary(doc: rd.ResultDocument) -> Dict[str, Any]: - result: Dict[str, Any] = {} +def render_dictionary(doc: rd.ResultDocument) -> dict[str, Any]: + result: dict[str, Any] = {} render_meta(doc, result) render_attack(doc, result) render_mbc(doc, result) diff --git a/scripts/capa2sarif.py b/scripts/capa2sarif.py index 79330479d..5d663b977 100644 --- a/scripts/capa2sarif.py +++ b/scripts/capa2sarif.py @@ -25,7 +25,7 @@ import json import logging import argparse -from typing import List, Optional +from typing import Optional from pathlib import Path from capa.version import __version__ @@ -241,7 +241,7 @@ def _populate_invocations(sarif_log: dict, meta_data: dict) -> None: sarif_log["runs"][0]["invocations"].append(invoke) -def _enumerate_evidence(node: dict, related_count: int) -> List[dict]: +def _enumerate_evidence(node: dict, related_count: int) -> list[dict]: related_locations = [] if node.get("success") and node.get("node", {}).get("type") != "statement": label = "" diff --git a/scripts/compare-backends.py b/scripts/compare-backends.py index 1c000bade..fa4ddb010 100644 --- a/scripts/compare-backends.py +++ b/scripts/compare-backends.py @@ -15,7 +15,7 @@ import statistics import subprocess import multiprocessing -from typing import Set, Dict, List, Optional +from typing import Optional from pathlib import Path from collections import Counter from dataclasses import dataclass @@ -183,8 +183,8 @@ def report(args): for backend in BACKENDS: samples.update(doc[backend].keys()) - failures_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS} - durations_by_backend: Dict[str, List[float]] = {backend: [] for backend in BACKENDS} + failures_by_backend: dict[str, set[str]] = {backend: set() for backend in BACKENDS} + durations_by_backend: dict[str, list[float]] = {backend: [] for backend in BACKENDS} console = rich.get_console() for key in sorted(samples): @@ -193,7 +193,7 @@ def report(args): seen_rules: Counter[str] = Counter() - rules_by_backend: Dict[str, Set[str]] = {backend: set() for backend in BACKENDS} + rules_by_backend: dict[str, set[str]] = {backend: set() for backend in BACKENDS} for backend in BACKENDS: if key not in doc[backend]: diff --git a/scripts/detect_duplicate_features.py b/scripts/detect_duplicate_features.py index 9561339c2..c904a1a05 100644 --- a/scripts/detect_duplicate_features.py +++ b/scripts/detect_duplicate_features.py @@ -8,7 +8,6 @@ import sys import logging import argparse -from typing import Set from pathlib import Path import capa.main @@ -18,7 +17,7 @@ logger = logging.getLogger("detect_duplicate_features") -def get_features(rule_path: str) -> Set[Feature]: +def get_features(rule_path: str) -> set[Feature]: """ Extracts all features from a given rule file. diff --git a/scripts/inspect-binexport2.py b/scripts/inspect-binexport2.py index de2c82d86..07fc79eca 100644 --- a/scripts/inspect-binexport2.py +++ b/scripts/inspect-binexport2.py @@ -14,7 +14,7 @@ import logging import argparse import contextlib -from typing import Dict, List, Optional +from typing import Optional import capa.main import capa.features.extractors.binexport2 @@ -71,14 +71,14 @@ def getvalue(self): def _render_expression_tree( be2: BinExport2, operand: BinExport2.Operand, - expression_tree: List[List[int]], + expression_tree: list[list[int]], tree_index: int, o: io.StringIO, ): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] if expression.type == BinExport2.Expression.REGISTER: o.write(expression.symbol) @@ -177,7 +177,7 @@ def _render_expression_tree( raise NotImplementedError(expression.type) -_OPERAND_CACHE: Dict[int, str] = {} +_OPERAND_CACHE: dict[int, str] = {} def render_operand(be2: BinExport2, operand: BinExport2.Operand, index: Optional[int] = None) -> str: @@ -223,7 +223,7 @@ def inspect_operand(be2: BinExport2, operand: BinExport2.Operand): def rec(tree_index, indent=0): expression_index = operand.expression_index[tree_index] expression = be2.expression[expression_index] - children_tree_indexes: List[int] = expression_tree[tree_index] + children_tree_indexes: list[int] = expression_tree[tree_index] NEWLINE = "\n" print(f" {' ' * indent}expression: {str(expression).replace(NEWLINE, ', ')}") @@ -435,7 +435,7 @@ def main(argv=None): # appears to be code continue - data_xrefs: List[int] = [] + data_xrefs: list[int] = [] for data_reference_index in idx.data_reference_index_by_target_address[data_address]: data_reference = be2.data_reference[data_reference_index] instruction_address = idx.get_insn_address(data_reference.instruction_index) diff --git a/scripts/lint.py b/scripts/lint.py index e96604e64..0d6ebfa93 100644 --- a/scripts/lint.py +++ b/scripts/lint.py @@ -27,7 +27,6 @@ import argparse import itertools import posixpath -from typing import Set, Dict, List from pathlib import Path from dataclasses import field, dataclass @@ -59,10 +58,10 @@ class Context: capabilities_by_sample: cache of results, indexed by file path. """ - samples: Dict[str, Path] + samples: dict[str, Path] rules: RuleSet is_thorough: bool - capabilities_by_sample: Dict[Path, Set[str]] = field(default_factory=dict) + capabilities_by_sample: dict[Path, set[str]] = field(default_factory=dict) class Lint: @@ -330,7 +329,7 @@ def check_rule(self, ctx: Context, rule: Rule): DEFAULT_SIGNATURES = capa.main.get_default_signatures() -def get_sample_capabilities(ctx: Context, path: Path) -> Set[str]: +def get_sample_capabilities(ctx: Context, path: Path) -> set[str]: nice_path = path.resolve().absolute() if path in ctx.capabilities_by_sample: logger.debug("found cached results: %s: %d capabilities", nice_path, len(ctx.capabilities_by_sample[path])) @@ -541,7 +540,7 @@ class FeatureStringTooShort(Lint): name = "feature string too short" recommendation = 'capa only extracts strings with length >= 4; will not match on "{:s}"' - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, (String, Substring)): assert isinstance(feature.value, str) @@ -559,7 +558,7 @@ class FeatureNegativeNumber(Lint): + 'representation; will not match on "{:d}"' ) - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, (capa.features.insn.Number,)): assert isinstance(feature.value, int) @@ -577,7 +576,7 @@ class FeatureNtdllNtoskrnlApi(Lint): + "module requirement to improve detection" ) - def check_features(self, ctx: Context, features: List[Feature]): + def check_features(self, ctx: Context, features: list[Feature]): for feature in features: if isinstance(feature, capa.features.insn.API): assert isinstance(feature.value, str) @@ -712,7 +711,7 @@ def run_lints(lints, ctx: Context, rule: Rule): yield lint -def run_feature_lints(lints, ctx: Context, features: List[Feature]): +def run_feature_lints(lints, ctx: Context, features: list[Feature]): for lint in lints: if lint.check_features(ctx, features): yield lint @@ -900,7 +899,7 @@ def width(s, count): def lint(ctx: Context): """ - Returns: Dict[string, Tuple(int, int)] + Returns: dict[string, tuple(int, int)] - # lints failed - # lints warned """ @@ -920,7 +919,7 @@ def lint(ctx: Context): return ret -def collect_samples(samples_path: Path) -> Dict[str, Path]: +def collect_samples(samples_path: Path) -> dict[str, Path]: """ recurse through the given path, collecting all file paths, indexed by their content sha256, md5, and filename. """ diff --git a/scripts/setup-linter-dependencies.py b/scripts/setup-linter-dependencies.py index b4ae3fd1f..4c2b321eb 100644 --- a/scripts/setup-linter-dependencies.py +++ b/scripts/setup-linter-dependencies.py @@ -43,7 +43,6 @@ import logging import argparse from sys import argv -from typing import Dict, List from pathlib import Path import requests @@ -77,7 +76,7 @@ def __init__(self): self._memory_store = MemoryStore(stix_data=stix_json["objects"]) @staticmethod - def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]: + def _remove_deprecated_objects(stix_objects) -> list[AttackPattern]: """Remove any revoked or deprecated objects from queries made to the data source.""" return list( filter( @@ -86,7 +85,7 @@ def _remove_deprecated_objects(stix_objects) -> List[AttackPattern]: ) ) - def _get_tactics(self) -> List[Dict]: + def _get_tactics(self) -> list[dict]: """Get tactics IDs from Mitre matrix.""" # Only one matrix for enterprise att&ck framework matrix = self._remove_deprecated_objects( @@ -98,7 +97,7 @@ def _get_tactics(self) -> List[Dict]: )[0] return list(map(self._memory_store.get, matrix["tactic_refs"])) - def _get_techniques_from_tactic(self, tactic: str) -> List[AttackPattern]: + def _get_techniques_from_tactic(self, tactic: str) -> list[AttackPattern]: """Get techniques and sub techniques from a Mitre tactic (kill_chain_phases->phase_name)""" techniques = self._remove_deprecated_objects( self._memory_store.query( @@ -124,12 +123,12 @@ def _get_parent_technique_from_subtechnique(self, technique: AttackPattern) -> A )[0] return parent_technique - def run(self) -> Dict[str, Dict[str, str]]: + def run(self) -> dict[str, dict[str, str]]: """Iterate over every technique over every tactic. If the technique is a sub technique, then we also search for the parent technique name. """ logging.info("Starting extraction...") - data: Dict[str, Dict[str, str]] = {} + data: dict[str, dict[str, str]] = {} for tactic in self._get_tactics(): data[tactic["name"]] = {} for technique in sorted( @@ -159,7 +158,7 @@ class MbcExtractor(MitreExtractor): url = "https://raw.githubusercontent.com/MBCProject/mbc-stix2/master/mbc/mbc.json" kill_chain_name = "mitre-mbc" - def _get_tactics(self) -> List[Dict]: + def _get_tactics(self) -> list[dict]: """Override _get_tactics to edit the tactic name for Micro-objective""" tactics = super()._get_tactics() # We don't want the Micro-objective string inside objective names diff --git a/scripts/show-capabilities-by-function.py b/scripts/show-capabilities-by-function.py index 6c8876581..e0e8fabc3 100644 --- a/scripts/show-capabilities-by-function.py +++ b/scripts/show-capabilities-by-function.py @@ -59,7 +59,6 @@ import logging import argparse import collections -from typing import Dict import colorama @@ -99,7 +98,7 @@ def render_matches_by_function(doc: rd.ResultDocument): - connect to HTTP server """ assert isinstance(doc.meta.analysis, rd.StaticAnalysis) - functions_by_bb: Dict[Address, Address] = {} + functions_by_bb: dict[Address, Address] = {} for finfo in doc.meta.analysis.layout.functions: faddress = finfo.address diff --git a/scripts/show-features.py b/scripts/show-features.py index 6005a810c..bf358aa21 100644 --- a/scripts/show-features.py +++ b/scripts/show-features.py @@ -67,7 +67,6 @@ import sys import logging import argparse -from typing import Tuple import capa.main import capa.rules @@ -136,7 +135,7 @@ def print_static_analysis(extractor: StaticFeatureExtractor, args): for feature, addr in extractor.extract_file_features(): print(f"file: {format_address(addr)}: {feature}") - function_handles: Tuple[FunctionHandle, ...] + function_handles: tuple[FunctionHandle, ...] if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): # pefile extractor doesn't extract function features function_handles = () diff --git a/scripts/show-unused-features.py b/scripts/show-unused-features.py index be850e927..55fbdad04 100644 --- a/scripts/show-unused-features.py +++ b/scripts/show-unused-features.py @@ -9,10 +9,8 @@ See the License for the specific language governing permissions and limitations under the License. """ import sys -import typing import logging import argparse -from typing import Set, List, Tuple from collections import Counter from rich import print @@ -40,8 +38,8 @@ def format_address(addr: capa.features.address.Address) -> str: return v.format_address(capa.features.freeze.Address.from_capa((addr))) -def get_rules_feature_set(rules: capa.rules.RuleSet) -> Set[Feature]: - rules_feature_set: Set[Feature] = set() +def get_rules_feature_set(rules: capa.rules.RuleSet) -> set[Feature]: + rules_feature_set: set[Feature] = set() for _, rule in rules.rules.items(): rules_feature_set.update(rule.extract_all_features()) @@ -49,9 +47,9 @@ def get_rules_feature_set(rules: capa.rules.RuleSet) -> Set[Feature]: def get_file_features( - functions: Tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor -) -> typing.Counter[Feature]: - feature_map: typing.Counter[Feature] = Counter() + functions: tuple[FunctionHandle, ...], extractor: capa.features.extractors.base_extractor.StaticFeatureExtractor +) -> Counter[Feature]: + feature_map: Counter[Feature] = Counter() for f in functions: if extractor.is_library_function(f.address): @@ -86,8 +84,8 @@ def get_colored(s: str) -> Text: return Text(s, style="cyan") -def print_unused_features(feature_map: typing.Counter[Feature], rules_feature_set: Set[Feature]): - unused_features: List[Tuple[str, Text]] = [] +def print_unused_features(feature_map: Counter[Feature], rules_feature_set: set[Feature]): + unused_features: list[tuple[str, Text]] = [] for feature, count in reversed(feature_map.most_common()): if feature in rules_feature_set: continue @@ -130,11 +128,11 @@ def main(argv=None): assert isinstance(extractor, StaticFeatureExtractor), "only static analysis supported today" - feature_map: typing.Counter[Feature] = Counter() + feature_map: Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_global_features()]) - function_handles: Tuple[FunctionHandle, ...] + function_handles: tuple[FunctionHandle, ...] if isinstance(extractor, capa.features.extractors.pefile.PefileFeatureExtractor): # pefile extractor doesn't extract function features function_handles = () @@ -173,7 +171,7 @@ def ida_main(): print(f"getting features for current function {hex(function)}") extractor = capa.features.extractors.ida.extractor.IdaFeatureExtractor() - feature_map: typing.Counter[Feature] = Counter() + feature_map: Counter[Feature] = Counter() feature_map.update([feature for feature, _ in extractor.extract_file_features()]) diff --git a/tests/fixtures.py b/tests/fixtures.py index 1912a456a..19285eca4 100644 --- a/tests/fixtures.py +++ b/tests/fixtures.py @@ -9,7 +9,6 @@ import binascii import contextlib import collections -from typing import Set, Dict from pathlib import Path from functools import lru_cache @@ -310,7 +309,7 @@ def extract_basic_block_features(extractor, fh, bbh): # f may not be hashable (e.g. ida func_t) so cannot @lru_cache this -def extract_instruction_features(extractor, fh, bbh, ih) -> Dict[Feature, Set[Address]]: +def extract_instruction_features(extractor, fh, bbh, ih) -> dict[Feature, set[Address]]: features = collections.defaultdict(set) for feature, addr in extractor.extract_insn_features(fh, bbh, ih): features[feature].add(addr) diff --git a/tests/test_binexport_accessors.py b/tests/test_binexport_accessors.py index bc9ea6db1..097af8b4f 100644 --- a/tests/test_binexport_accessors.py +++ b/tests/test_binexport_accessors.py @@ -8,7 +8,7 @@ import re import logging -from typing import Any, Dict +from typing import Any from pathlib import Path import pytest @@ -297,7 +297,7 @@ def test_get_operand_immediate_expression(addr, expressions): bl 0x100 add x0, sp, 0x10 """ -BE2_DICT: Dict[str, Any] = { +BE2_DICT: dict[str, Any] = { "expression": [ {"type": BinExport2.Expression.REGISTER, "symbol": "x0"}, {"type": BinExport2.Expression.IMMEDIATE_INT, "immediate": 0x20}, diff --git a/tests/test_freeze_dynamic.py b/tests/test_freeze_dynamic.py index b3087c092..ead4d50c2 100644 --- a/tests/test_freeze_dynamic.py +++ b/tests/test_freeze_dynamic.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import textwrap -from typing import List from pathlib import Path import fixtures @@ -85,7 +84,7 @@ ) -def addresses(s) -> List[Address]: +def addresses(s) -> list[Address]: return sorted(i.address for i in s) diff --git a/tests/test_freeze_static.py b/tests/test_freeze_static.py index 4674afc89..bd0c90b5d 100644 --- a/tests/test_freeze_static.py +++ b/tests/test_freeze_static.py @@ -6,7 +6,6 @@ # is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and limitations under the License. import textwrap -from typing import List from pathlib import Path import pytest @@ -65,7 +64,7 @@ ) -def addresses(s) -> List[Address]: +def addresses(s) -> list[Address]: return sorted(i.address for i in s) diff --git a/web/rules/scripts/build_root.py b/web/rules/scripts/build_root.py index aefd8d882..fbb0947a7 100644 --- a/web/rules/scripts/build_root.py +++ b/web/rules/scripts/build_root.py @@ -11,7 +11,6 @@ import sys import random import logging -from typing import Dict, List from pathlib import Path import capa.rules @@ -49,7 +48,7 @@ def read_file_paths(txt_file_path: Path): - categorized_files: Dict[str, List[Path]] = { + categorized_files: dict[str, list[Path]] = { "modified in the last day": [], "modified in the last week": [], "modified in the last month": [],