Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates capa_explorer.py, enabling the user to choose b/w having bookmarks & comments. #2029

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 75 additions & 16 deletions capa/ghidra/capa_explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,10 @@ def create_label(ghidra_addr, name, capa_namespace):
# prevent duplicate labels under the same capa-generated namespace
symbol_table = currentProgram().getSymbolTable() # type: ignore [name-defined] # noqa: F821
for sym in symbol_table.getSymbols(ghidra_addr):
if sym.getName(True) == capa_namespace.getName(True) + Namespace.DELIMITER + name:
if (
sym.getName(True)
== capa_namespace.getName(True) + Namespace.DELIMITER + name
):
return

# create SymbolType.LABEL at addr
Expand Down Expand Up @@ -98,7 +101,9 @@ def bookmark_functions(self):
for part in item.get("parts", {}):
attack_txt = attack_txt + part + Namespace.DELIMITER
attack_txt = attack_txt + item.get("id", {})
add_bookmark(func_addr, attack_txt, "CapaExplorer::MITRE ATT&CK")
add_bookmark(
func_addr, attack_txt, "CapaExplorer::MITRE ATT&CK"
)

if self.mbc != []:
for item in self.mbc:
Expand Down Expand Up @@ -127,11 +132,28 @@ def set_pre_comment(self, ghidra_addr, sub_type, description):
"""set pre comments at subscoped matches of main rules"""
comment = getPreComment(ghidra_addr) # type: ignore [name-defined] # noqa: F821
if comment is None:
comment = "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
comment = (
"capa: "
+ sub_type
+ "("
+ description
+ ")"
+ ' matched in "'
+ self.capability
+ '"\n'
)
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
elif self.capability not in comment:
comment = (
comment + "capa: " + sub_type + "(" + description + ")" + ' matched in "' + self.capability + '"\n'
comment
+ "capa: "
+ sub_type
+ "("
+ description
+ ")"
+ ' matched in "'
+ self.capability
+ '"\n'
)
setPreComment(ghidra_addr, comment) # type: ignore [name-defined] # noqa: F821
else:
Expand Down Expand Up @@ -167,7 +189,9 @@ def label_matches(self):
# precomment subscope matches under the function
if node != {}:
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
self.set_pre_comment(
sub_ghidra_addr, sub_type, description
)
else:
# resolve the encompassing function for the capa namespace
# of non-function scoped main matches
Expand All @@ -191,7 +215,9 @@ def label_matches(self):
if func is not None:
# basic block/ insn scope under resolved function
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
self.set_pre_comment(
sub_ghidra_addr, sub_type, description
)
else:
# this would be a global/file scoped main match
# try to resolve the encompassing function via the subscope match, instead
Expand All @@ -200,21 +226,31 @@ def label_matches(self):
if sub_func is not None:
sub_func_addr = sub_func.getEntryPoint()
# place function in capa namespace & create the subscope match label in Ghidra's global namespace
create_label(sub_func_addr, sub_func.getName(), capa_namespace)
create_label(
sub_func_addr,
sub_func.getName(),
capa_namespace,
)
Atlas-64 marked this conversation as resolved.
Show resolved Hide resolved
self.set_plate_comment(sub_func_addr)
for sub_type, description in parse_node(node):
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
self.set_pre_comment(
sub_ghidra_addr, sub_type, description
)
else:
# addr is in some other file section like .data
# represent this location with a label symbol under the capa namespace
# Ex. See "Reference Base64 String" rule
for sub_type, description in parse_node(node):
# in many cases, these will be ghidra-labeled data, so just add the existing
# label symbol to the capa namespace
for sym in symbol_table.getSymbols(sub_ghidra_addr):
for sym in symbol_table.getSymbols(
sub_ghidra_addr
):
if sym.getSymbolType() == SymbolType.LABEL:
sym.setNamespace(capa_namespace)
self.set_pre_comment(sub_ghidra_addr, sub_type, description)
self.set_pre_comment(
sub_ghidra_addr, sub_type, description
)


def get_capabilities():
Expand All @@ -238,9 +274,13 @@ def get_capabilities():
meta = capa.ghidra.helpers.collect_metadata([rules_path])
extractor = capa.features.extractors.ghidra.extractor.GhidraFeatureExtractor()

capabilities, counts = capa.capabilities.common.find_capabilities(rules, extractor, True)
capabilities, counts = capa.capabilities.common.find_capabilities(
rules, extractor, True
)

if capa.capabilities.common.has_file_limitation(rules, capabilities, is_standalone=False):
if capa.capabilities.common.has_file_limitation(
rules, capabilities, is_standalone=False
):
popup("capa explorer encountered warnings during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
logger.info("capa encountered warnings during analysis")

Expand Down Expand Up @@ -359,9 +399,26 @@ def main():
popup("capa explorer found no matches.") # type: ignore [name-defined] # noqa: F821
return capa.main.E_EMPTY_REPORT

for item in parse_json(capa_data):
item.bookmark_functions()
item.label_matches()
user_choice = askChoice( # type: ignore [name-defined] # noqa: F821
Atlas-64 marked this conversation as resolved.
Show resolved Hide resolved
"Choose b/w bookmarks & comments",
Atlas-64 marked this conversation as resolved.
Show resolved Hide resolved
"preferred action:",
Atlas-64 marked this conversation as resolved.
Show resolved Hide resolved
["bookmarks", "comments", "both", "none"],
Atlas-64 marked this conversation as resolved.
Show resolved Hide resolved
"both",
)

if user_choice == "bookmarks":
for item in parse_json(capa_data):
item.bookmark_functions()
elif user_choice == "comments":
for item in parse_json(capa_data):
item.label_matches()
elif user_choice == "both":
for item in parse_json(capa_data):
item.bookmark_functions()
item.label_matches()
else:
pass
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This script modifies a Ghidra database by adding:

  1. new namespace named "capa" and corresponding namespace entries
  2. pre/plate-comments
  3. function bookmarks

We want our changes here to enable users to select any number of these options when running this script. Presently, the label_matches method creates the "capa" namespace, corresponding namespace entries, and pre/plate-comments. We need to modify the label_matches method to account for the user's selection and I'd recommend passing new boolean arguments to the label_matches method to implement this. Creating a new "capa" namespace and corresponding namespace entries should be grouped as one option, likewise with setting pre/plate-comments.

Copy link
Contributor Author

@Atlas-64 Atlas-64 Mar 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@mike-hunhoff

So just to clarify for the adding comments based on the user input, we want to just have a boolean default and pass an argument based on what the user wants in the function call within the if-else statement.

and, if I understand right you'd like separate options for creating the 'capa' namespace and setting pre/plate comments, even though the namespace might be used for comments. Is that correct?

Can you elaborate on the reasoning behind separate options? Is it for user flexibility or maybe workflow reasons?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Taking a step back, it may be easier to understand the requested changes by reviewing the README's UI integration section.

capa_explorer.py does three things:

  1. Add Ghidra top-level namespace named "capa" that is viewable in Ghidra's Symbol Tree window
  2. Add pre/plate comments that are viewable in Ghidra's Disassembly Listing and Decompiler windows
  3. Add bookmarks that are viewable in Ghidra's Bookmarks window

We'd like to give user's an option to choose which of these three things are executed based on their needs/workflow. For example, a user may be interested in options 1 and 3 while not wanting 100s of comments added to their Ghidra database by option 2.


logger.info("capa explorer analysis complete")
popup("capa explorer analysis complete.\nPlease see results in the Bookmarks Window and Namespaces section of the Symbol Tree Window.") # type: ignore [name-defined] # noqa: F821
return 0
Expand All @@ -371,7 +428,9 @@ def main():
if sys.version_info < (3, 8):
from capa.exceptions import UnsupportedRuntimeError

raise UnsupportedRuntimeError("This version of capa can only be used with Python 3.8+")
raise UnsupportedRuntimeError(
"This version of capa can only be used with Python 3.8+"
)
exit_code = main()
if exit_code != 0:
popup("capa explorer encountered errors during analysis. Please check the console output for more information.") # type: ignore [name-defined] # noqa: F821
Expand Down
Loading