-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathghidralib.py
4289 lines (3359 loc) · 151 KB
/
ghidralib.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
This library is an attempt to provide a Pythonic standard library for Ghidra.
The main goal is to make writing quick&dirty scripts actually quick, and not that dirty.
There is no equivalent of FlatProgramAPI from GHidra. You are expected to start
by getting an object of interest by calling instance methods, for example
>>> Function("main")
main
to get a function called "main". When you want to do something this library
doesn't support (yet), you can always excape back to Ghidra's wrapped Java
types, by getting a `.raw` property, for example:
>>> Function("main").raw.UNKNOWN_STACK_DEPTH_CHANGE
2147483647
For more details, see the documentation at https://msm-code.github.io/ghidralib/.
"""
from abc import abstractmethod
from ghidra.app.decompiler import (
ClangSyntaxToken,
ClangCommentToken,
ClangBreak,
ClangTokenGroup as GhClangTokenGroup,
DecompInterface,
)
from ghidra.app.services import DataTypeManagerService, GraphDisplayBroker
from ghidra.app.util import PseudoDisassembler
from ghidra.app.util.cparser.C import CParser
from ghidra.app.emulator import EmulatorHelper
from ghidra.app.plugin.core.colorizer import ColorizingService
from ghidra.app.plugin.assembler import Assemblers
from ghidra.app.plugin.core.analysis import ConstantPropagationContextEvaluator
from ghidra.app.cmd.function import CreateFunctionCmd
from ghidra.app.util import SearchConstants
from ghidra.util.task import TaskMonitor
from ghidra.program.model.symbol import SourceType, RefType as GhRefType
from ghidra.program.model.pcode import (
HighFunctionDBUtil,
Varnode as GhVarnode,
BlockGraph as GhBlockGraph,
BlockCopy,
HighFunction as GhHighFunction,
JumpTable,
)
from ghidra.program.model.lang import Register as GhRegister
from ghidra.program.model.block import BasicBlockModel, SimpleBlockModel
from ghidra.program.model.address import (
GenericAddress,
AddressSet as GhAddressSet,
AddressSpace,
)
from ghidra.program.model.listing import ParameterImpl, Function as GhFunction
from ghidra.program.util import SymbolicPropogator as GhSymbolicPropogator
from ghidra.service.graph import GraphDisplayOptions, AttributedGraph, GraphType
from java.awt import Color
from java.util import ArrayList
from java.math import BigInteger
import sys
__version__ = "0.2.0"
try:
# Import types for static type hints (and catch import error in Jython)
from typing import Any, Callable, TYPE_CHECKING, Iterator, TypeVar, Generic
except ImportError:
TYPE_CHECKING = False
if sys.version_info.major == 2:
# Jython support: this section is Jython specific
from __main__ import (
toAddr,
createFunction,
getDataAt,
createLabel,
getMonitor,
createData,
clearListing,
getReferencesTo,
getInstructionAt,
getBytes,
getState,
getMonitor,
removeSymbol,
getCurrentProgram,
disassemble,
analyzeChanges,
setBytes,
)
# Python2 specific type definitions
# The goal is to support both languages with a single codebase
if not TYPE_CHECKING:
bytes = str # there is no "bytes" in python3.
from array import array
def _is_array(value): # type: (object) -> bool
"""Return True if the given value is a Java array"""
return isinstance(value, array)
def _bytes_as_list(value): # type: (bytes) -> list[int]
"""Convert the given string to a list of bytes."""
return [ord(c) for c in value] # type: ignore
def _bytes_from_bytelist(bytelist): # type: (object) -> bytes
"""Convert the given Java array of bytes to python bytes"""
return "".join(chr(x % 256) for x in bytelist) # type: ignore
def _asbytes(value): # type: (str) -> bytes
"""Convert the given string to bytes. No-op in py2"""
return value # type: ignore
def _unhex(s): # type: (str) -> bytes
"""Decode hexadecimal string to bytes. Internal helper."""
return s.replace(" ", "").replace("\n", "").decode("hex") # type: ignore
def _enhex(s): # type: (bytes) -> str
"""Encode bytes to hexadecimal string. Internal helper."""
return s.encode("hex") # type: ignore
else:
# PyGhidra support: this section is PyGhidra specific
from pyghidra.script import get_current_interpreter
interpreter = get_current_interpreter()
toAddr = get_current_interpreter().toAddr
createFunction = get_current_interpreter().createFunction
getDataAt = get_current_interpreter().getDataAt
createLabel = get_current_interpreter().createLabel
getState = get_current_interpreter().getState
createData = get_current_interpreter().createData
clearListing = get_current_interpreter().clearListing
getReferencesTo = get_current_interpreter().getReferencesTo
getInstructionAt = get_current_interpreter().getInstructionAt
getBytes = get_current_interpreter().getBytes
getMonitor = get_current_interpreter().getMonitor
removeSymbol = get_current_interpreter().removeSymbol
getCurrentProgram = get_current_interpreter().getCurrentProgram
disassemble = get_current_interpreter().disassemble
analyzeChanges = get_current_interpreter().analyzeChanges
setBytes = get_current_interpreter().setBytes
# Python3 specific type definitions
# The goal is to support both languages with a single codebase
long = int # There is no "long" in Python3
class unicode:
"""A fake stub class, to keep type-checker relatively happy"""
def encode(self): # type: () -> str
"""A fake method, to keep type-checker relatively happy"""
raise NotImplementedError("This method should never be called")
from jpype import JArray
def _is_array(value): # type: (object) -> bool
"""Return True if the given value is a Java array"""
return isinstance(value, JArray)
def _bytes_as_list(value): # type: (bytes) -> list[int]
"""Convert the given string to a list of bytes (no-op in py3)"""
return value # type: ignore
def _bytes_from_bytelist(bytelist): # type: (object) -> bytes
"""Convert the given Java array of bytes to python bytes"""
return bytes(bytelist) # type: ignore
def _asbytes(value): # type: (str) -> bytes
"""Convert the given string to bytes.
Ugly hack in python3 - This converts chr(N) to byte N for every N."""
return value.encode("latin1")
def _unhex(s): # type: (str) -> bytes
"""Decode hexadecimal string to bytes. Internal helper."""
return bytes.fromhex(s)
def _enhex(s): # type: (bytes) -> str
"""Encode bytes to hexadecimal string. Internal helper."""
return s.hex()
# Early Python2.x aliases
if TYPE_CHECKING:
# Python 2.x archaism.
long = int
unicode = str # type: ignore
class JavaObject:
"""A fake class, used for static type hints."""
def __getattribute__(self, name): # type: (str) -> Any
"""This attribute exists to make mypy happy."""
pass
def _python_int(value):
"""Convert a given int-like value to a Python integer.
This is a jpype helper: it converts Java BigIntegers to Python ints.
:param value: The value to convert, either a Python int or BigInteger.
:return: The converted value, always a python int."""
if isinstance(value, BigInteger):
return int(value.toString()) # wtf
return value
def _python_str(string): # type: (str|unicode) -> str
"""Convert a given string-like value to a Python `str`.
In particular, this will convert unicode objects to normal strings.
This method only matters for Jython (Python 2) compatibility"""
if isinstance(string, unicode):
# This can only happen for
return string.encode()
return string
class GhidraWrapper(object):
"""The base class for all Ghidra wrappers.
This function tries to be as transparent as possible - for example, it will
not raise an error on double-wrapping, or when passed instead of a
Java type.
>>> instr = getInstructionAt(getAddr(0x1234))
>>> GhidraWrapper(instr)
<Instruction 0x1234>
>>> GhidraWrapper(GhidraWrapper(instr))
<Instruction 0x1234>
>>> getInstructionBefore(Instruction(instr))
<Instruction 0x1233>
Similarly, equality is based on the underlying Java object."""
def __init__(self, raw): # type: (JavaObject|int|str|GhidraWrapper) -> None
"""Initialize the wrapper.
This function will try to resolve the given object to a Ghidra object.
The algorithm is as follows:
* If "raw" is a primitive type (int, long, str, unicode, Address),
try to resolve it with a static "get" method of the subclass.
* If "raw" is a GhidraWrapper, unwrap it (so GhidraWrapper(GhidraWrapper(x))
is always the same as GhidraWrapper(x).
* If "raw" is None at this point, raise an exception.
* If the subclass has attribute UNDERLYING_CLASS, assert that the wrapped
type is of the expected type.
* Save the final "raw" value."""
if isinstance(raw, (int, long, str, unicode, GenericAddress)):
# Someone passed a primitive type to us.
# If possible, try to resolve it with a "get" method.
if hasattr(self, "get"):
new_raw = self.get(raw) # type: ignore
if new_raw is None:
# Show original data for better error messages
raise RuntimeError("Unable to wrap " + str(raw))
raw = new_raw
else:
raise RuntimeError("Unable to wrap a primitive: " + str(raw))
while isinstance(raw, GhidraWrapper):
# In case someone tries to Function(Function("main")) us
raw = raw.raw
if raw is None:
raise RuntimeError("Object doesn't exist (refusing to wrap None)")
# TODO - remove the conditional checks and implement this everywhere
if hasattr(self, "UNDERLYING_CLASS"):
wrapped_type = getattr(self, "UNDERLYING_CLASS")
if not isinstance(raw, wrapped_type):
raise RuntimeError(
"You are trying to wrap {} as {}".format(
raw.__class__.__name__, self.__class__.__name__
)
)
def _java_cast(raw): # type: (Any) -> JavaObject
"""This function exists only to make type-checker happy"""
return raw
self.raw = _java_cast(raw)
def __str__(self): # type: () -> str
"""Return a string representation of this object.
This just forwards the call to the underlying object."""
return self.raw.__str__()
def __repr__(self): # type: () -> str
"""Return a string representation of this object.
This just forwards the call to the underlying object."""
return self.raw.__repr__()
def __tojava__(self, klass):
"""Make it possible to pass this object to Java methods.
This only works in Jython, I didn't find a way to do this in JPype yet."""
return self.raw
def __hash__(self): # type: () -> int
"""Return the hash of this object.
This just forwards the call to the underlying object."""
return self.raw.hashCode()
def __eq__(self, other): # type: (object) -> bool
"""Check if this object is equal to another.
This just forwards the call to the underlying object."""
if isinstance(other, GhidraWrapper):
return self.raw.equals(other.raw)
return self.raw.equals(other)
# Aliases just for typechecking.
if TYPE_CHECKING:
Addr = GenericAddress | int | str
# This library accepts one of three things as addressses:
# 1. A Ghidra Address object
# 2. An integer representing an address
# 3. A string representing a symbol name
# When returning a value, the address is always returned as an integer.
Reg = GhRegister | str
# This library accepts one of two things as registers:
# 1. A Ghidra Register object
# 2. A string representing a register name
DataT = GhidraWrapper | JavaObject | str
# This library accepts one of two things as a DataType:
# 1. A Ghidra DataType object
# 2. A string representing a DataType name (will be resolved)
# For isinstance checks, so i can forget about this distinction once again
Str = (str, bytes, unicode)
# Use this color for highlight by default - it should work with any theme.
HIGHLIGHT_COLOR = SearchConstants.SEARCH_HIGHLIGHT_COLOR # type: Color
def resolve(addr): # type: (Addr) -> GenericAddress
"""Convert an arbitrary addressable value to a Ghidra Address object.
This library accepts one of three things as addressses:
1. A Ghidra Address object
2. An integer representing an address
3. A string representing a symbol name
This function is responsible from converting the addressable values (`Addr`)
to Ghidra addresses (`GenericAddress`).
>>> resolve(0x1234)
0x1234
>>> resolve(Symbol("main"))
0x1234
>>> resolve(toAddr(0x1234))
0x1234
:param addr: An addressable value.
:return: A GenericAddress object representing the passed address.
"""
if isinstance(addr, unicode): # Why, Ghidra?
addr = addr.encode()
if isinstance(addr, GenericAddress):
return addr
if isinstance(addr, (int, long)):
# Why convert to string? Java cannot handle large (unsigned) integers :/
return toAddr("{:x}".format(addr))
if isinstance(addr, str):
return toAddr(Symbol(addr).address)
raise TypeError("Address must be a ghidra Address, int, or str")
def try_resolve(addr): # type: (Addr) -> GenericAddress | None
"""Convert an arbitrary addressable value to a Ghidra Address object.
See `resolve` documentation for more details.
:param addr: An addressable value.
:return: A GenericAddress representing the value, or None resolving failed."""
try:
return resolve(addr)
except:
return None
def can_resolve(addr): # type: (Addr) -> bool
"""Check if a passed value address can be resolved.
This is useful for checking if `resolve()` will succeed.
See `resolve` documentation for more details."""
return isinstance(addr, (GenericAddress, int, long, unicode, str))
def unwrap(wrapper_or_java_type): # type: (JavaObject|GhidraWrapper) -> JavaObject
"If the argument is a GhidraWrapper, return the underlying Java object." ""
if isinstance(wrapper_or_java_type, GhidraWrapper):
return wrapper_or_java_type.raw
return wrapper_or_java_type
def collect_iterator(iterator): # type: (JavaObject) -> list
"""Collect a Java iterator to a Python list."""
result = []
while iterator.hasNext():
result.append(iterator.next())
return result
if TYPE_CHECKING:
# Hacky workaround to have a optional generic type variable using comment syntax.
T = TypeVar("T")
GenericT = Generic[T]
else:
class GenericT:
pass
class Graph(GenericT, GhidraWrapper):
"""Wraps a Ghidra AttributedGraph object.
We'd like to store arbitrary object in the graph, but it only supports
strings for keys (and names). We have a way to convert objects we are
interested in to strings - see _get_unique_string() method."""
# TODO: maybe this should be a GDirectedGraph, so we get some algorithms
# for free, and we can just convert it for display.
def __init__(self, raw): # type: (AttributedGraph) -> None
"""Create a new Graph wrapper.
We have to keep track of additional data, since AttributedGraph is a bit
clunky and can only store string IDs and string values.
:param raw: The AttributedGraph object to wrap."""
GhidraWrapper.__init__(self, raw)
self.data = {}
@staticmethod
def create(name=None, description=None): # type: (str|None, str|None) -> Graph[Any]
"""Create a new Graph.
:param name: The name of the graph. If None, a default name will be used.
:param description: The description of the graph. If
None, a default description will be used.
:returns: a new Graph object.
"""
name = name or "Graph"
description = description or "Graph"
graphtype = GraphType(name, description, ArrayList([]), ArrayList([]))
return Graph(AttributedGraph(name, graphtype, description))
@staticmethod
def construct(
vertexlist, getedges
): # type: (list[T], Callable[[T], list[T]]) -> Graph[T]
"""Create a new Graph from a list of vertices and a function to get edges.
:param vertexlist: The list of vertices.
:param getedges: A function that gets a list of destinations from a vertex."""
g = Graph.create()
for v in vertexlist:
g.vertex(v)
for v in vertexlist:
for dest in getedges(v):
if dest in g:
g.edge(v, dest)
return g
def __contains__(self, vtx): # type: (T) -> bool
"""Check if a given vertex exists in this graph.
:param vtx: The ID of the vertex to check."""
vid = _get_unique_string(vtx)
vobj = self.raw.getVertex(vid)
return self.raw.containsVertex(vobj)
def has_vertex(self, vtx): # type: (T) -> bool
"""Check if a given vertex exists in this graph.
:param vtx: The ID of the vertex to check."""
return vtx in self
def vertex(self, vtx, name=None): # type: (T, str|None) -> T
"""Get or create a vertex in this graph.
:param vtx: The ID of the new vertex, or any "Vertexable" object
that can be used to identify the vertex.
:param name: The name of the vertex. If not provided,
the ID will be used as the name.
:returns: vtx parameter is returned"""
vid = _get_unique_string(vtx)
name = name or str(vtx)
self.raw.addVertex(vid, name)
self.data[vid] = vtx
return vtx
def edge(self, src, dst): # type: (T, T) -> None
"""Create an edge between two vertices in this graph.
:param src: The source vertex ID.
:param dst: The destination vertex ID."""
srcid = _get_unique_string(src)
dstid = _get_unique_string(dst)
srcobj = self.raw.getVertex(srcid)
dstobj = self.raw.getVertex(dstid)
self.raw.addEdge(srcobj, dstobj)
@property
def vertices(self): # type: () -> list[T]
"""Get all vertices in this graph.
Warning: this constructs the list every time, so it's not a light operation.
Use vertex_count for counting."""
return [self.__resolve(vid.getId()) for vid in self.raw.vertexSet()]
@property
def vertex_count(self): # type: () -> int
"""Return the number of vertices in this graph."""
return self.raw.vertexSet().size()
def __len__(self): # type: () -> int
"""Return the number of vertices in this graph.
To get the number of edges, use edge_count."""
return self.vertex_count
@property
def edges(self): # type: () -> list[tuple[T, T]]
"""Get all edges in this graph.
Warning: this constructs the list every time, so it's not a light operation.
Use edge_count for counting."""
result = []
for e in self.raw.edgeSet():
frm = self.raw.getEdgeSource(e)
to = self.raw.getEdgeTarget(e)
frmobj = self.data.get(frm, frm)
toobj = self.data.get(to, to)
result.append((frmobj, toobj))
return result
@property
def edge_count(self): # type: () -> int
"""Return the number of edges in this graph."""
return self.raw.edgeSet().size()
@property
def name(self): # type: () -> str
"""Return the name of this graph."""
return self.raw.getName()
@property
def description(self): # type: () -> str
"""Return the description of this graph."""
return self.raw.getDescription()
def to_dot(self): # type: () -> str
"""Return a DOT representation of this graph."""
result = []
result.append("digraph {} {{".format(self.name))
for v in self.raw.vertexSet():
result.append(' "{}" [label="{}"];'.format(v.getId(), v.getName()))
for e in self.raw.edgeSet():
frm = self.raw.getEdgeSource(e)
to = self.raw.getEdgeTarget(e)
result.append(' "{}" -> "{}";'.format(frm.getId(), to.getId()))
result.append("}")
return "\n".join(result)
def show(self): # type: () -> None
"""Display this graph in the Ghidra GUI."""
graphtype = self.raw.getGraphType()
description = graphtype.getDescription()
options = GraphDisplayOptions(graphtype)
broker = getState().tool.getService(GraphDisplayBroker)
display = broker.getDefaultGraphDisplay(False, getMonitor())
display.setGraph(self.raw, options, description, False, getMonitor())
def __resolve(self, vid): # type: (str) -> T
"""Resolve a vertex ID to a vertex object.
:param vid: The ID of the vertex to resolve."""
if vid in self.data:
return self.data[vid]
else:
return vid # type: ignore graph created outside of ghidralib?
def dfs(
self, origin, callback=lambda _: None
): # type: (T, Callable[[T], None]) -> dict[T, T|None]
"""Perform a depth-first search on this graph, starting from the given vertex.
The callback will be called for each vertex visited when first visited, and
the returned value is a dictionary of parent vertices for each visited vertex.
>>> g = Graph.create()
>>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
>>> g.edge(a, b)
>>> g.edge(b, c)
>>> g.dfs(a)
{'a': None, 'b': 'a', 'c': 'b'}
Warning: This won't reach every node in the graph, if it's not connected.
:param origin: The ID of the vertex to start the search from.
:param callback: A callback function to call for each vertex visited.
:returns: A dictionary of parent vertices for each visited vertex.
"""
tovisit = [(None, _get_unique_string(origin))]
visited = set()
parents = {origin: None} # type: dict[T, T|None]
while tovisit:
parent, vid = tovisit.pop()
if vid in visited:
continue
visited.add(vid)
vobj = self.__resolve(vid)
parents[vobj] = parent
callback(vobj)
for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
return parents
def toposort(self, origin): # type: (T) -> list[T]
"""Perform a topological sort on this graph, starting from the given vertex.
:param origin: The ID of the vertex to start the sort from.
The order is such that if there is an edge from A to B, then A will come
before B in the list. This means that if the graph is connected and acyclic
then "origin" will be the last element in the list.
On a practical example, for a call graph, this means that if A calls B, then
B will be before A in the list - so if you want to process from the bottom up,
you should use the entry point of the program as the origin. In the example
below, the entry point is "a", "a" calls "b", and "b" calls "c":
>>> g = Graph.create()
>>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
>>> g.edge(a, b)
>>> g.edge(b, c)
>>> g.toposort(a)
['c', 'b', 'a']
:param origin: The ID of the origin vertex to start the sort from.
:returns: a list of vertex IDs in topological order."""
visited = set()
result = []
def dfs(vid):
visited.add(vid)
for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
target = self.raw.getEdgeTarget(edge)
if target.getId() not in visited:
dfs(target.getId())
result.append(self.__resolve(vid))
dfs(_get_unique_string(origin))
for vid in self.raw.vertexSet():
if vid.getId() not in visited:
dfs(vid.getId())
return result
def bfs(
self, origin, callback=lambda _: None
): # type: (T, Callable[[T], None]) -> dict[T, T|None]
"""Perform a breadth-first search on this graph, starting from the given vertex.
The callback will be called for each vertex visited when first visited, and
the returned value is a dictionary of parent vertices for each visited vertex.
>>> g = Graph.create()
>>> a, b, c = g.vertex("a"), g.vertex("b"), g.vertex("c")
>>> g.edge(a, b)
>>> g.edge(b, c)
>>> g.bfs(a)
{'a': None, 'b': 'a', 'c': 'b'}
Warning: This won't reach every node in the graph, if it's not connected.
:param origin: The ID of the vertex to start the search from.
:param callback: A callback function to call for each vertex visited.
"""
tovisit = [(None, _get_unique_string(origin))]
visited = set()
parents = {origin: None} # type: dict[T, T|None]
while tovisit:
parent, vid = tovisit.pop(0)
if vid in visited:
continue
visited.add(vid)
vobj = self.__resolve(vid)
parents[vobj] = parent
callback(vobj)
for edge in self.raw.edgesOf(self.raw.getVertex(vid)):
tovisit.append((vobj, self.raw.getEdgeTarget(edge).getId()))
return parents
class BodyTrait:
"""A trait for objects that have a body.
It provides generic methods that work with anything that has a body
(an assigned set of addresses in the program), such as highlighting."""
@property
@abstractmethod
def body(self): # type: () -> AddressSet
"""The body of this object"""
def highlight(self, color=HIGHLIGHT_COLOR): # type: (Color) -> None
"""Highlight this instruction in the listing."""
self.body.highlight(color)
def unhighlight(self): # type: () -> None
"""Clear the highlight from this instruction."""
self.body.unhighlight()
class HighVariable(GhidraWrapper):
@property
def symbol(self): # type: () -> HighSymbol
return HighSymbol(self.raw.getSymbol())
def rename(self, new_name): # type: (str) -> None
"""Rename this high variable."""
self.symbol.rename(new_name)
@property
def size(self): # type: () -> int
"""Return the size of this variable in bytes"""
return self.raw.getSize()
@property
def data_type(self): # type: () -> DataType
"""Return the data type of this variable"""
return DataType(self.raw.getDataType())
@property
def name(self): # type: () -> str
"""Return the name of this variable"""
return self.raw.getName()
@property
def varnode(self): # type: () -> Varnode
"""Return the Varnode that represents this variable"""
return Varnode(self.raw.getRepresentative())
@property
def varnodes(self): # type: () -> list[Varnode]
"""Return all Varnodes that represent this variable at some point"""
return [Varnode(vn) for vn in self.raw.getInstances()]
@property
def is_unaffected(self): # type: () -> bool
"""Return True if ALL varnodes of this variable are is unaffected."""
return any(vn.is_unaffected for vn in self.varnodes)
@property
def is_persistent(self): # type: () -> bool
"""Return True if ALL varnodes of this variable are persistent."""
return any(vn.is_persistent for vn in self.varnodes)
@property
def is_addr_tied(self): # type: () -> bool
"""Return True if ALL varnodes of this variable are addr tied."""
return any(vn.is_addr_tied for vn in self.varnodes)
@property
def is_input(self): # type: () -> bool
"""Return True if ALL varnodes of this variable are input."""
return any(vn.is_input for vn in self.varnodes)
@property
def is_free(self): # type: () -> bool
"""Return True if ALL varnodes of this variable are free."""
return all(vn.is_free for vn in self.varnodes)
class HighSymbol(GhidraWrapper):
def rename(
self, new_name, source=SourceType.USER_DEFINED
): # type: (str, SourceType) -> None
"""Rename this high symbol.
:param new_name: The new name of the symbol
:param source: The source of the symbol"""
HighFunctionDBUtil.updateDBVariable(self.raw, new_name, None, source)
@property
def size(self): # type: () -> int
"""Return the size of this symbol in bytes"""
return self.raw.getSize()
@property
def data_type(self): # type: () -> DataType
"""Return the data type of this symbol"""
return DataType(self.raw.getDataType())
@property
def variable(self): # type: () -> HighVariable|None
"""Return the high variable associated with this symbol, if any.
The symbol may have multiple HighVariables associated with it.
This method returns the biggest one."""
raw = self.raw.getHighVariable()
if raw is None:
return None
return HighVariable(raw)
@property
def name(self): # type: () -> str
"""Return the name of this symbol"""
return self.raw.getName()
@property
def symbol(self): # type: () -> Symbol|None
"""Get the corresponding symbol, if it exists."""
raw = self.raw.getSymbol()
if raw is None:
return None
return Symbol(raw)
@property
def is_this_pointer(self): # type: () -> bool
"""Return True if this symbol is a "this" pointer for a class"""
return self.raw.isThisPointer()
class Register(GhidraWrapper):
@staticmethod
def get(raw_or_name): # type: (str|JavaObject) -> Register|None
"""Get a register by name"""
if isinstance(raw_or_name, Str):
raw_or_name = Program.current().getLanguage().getRegister(raw_or_name)
if raw_or_name is None:
return None
return Register(raw_or_name)
@property
def name(self): # type: () -> str
"""Return the name of this register"""
return self.raw.getName()
@property
def size(self): # type: () -> int
"""Return the size of this register in bytes
This will tell the total number of bytes this register contains -
because register values don't have to be byte-aligned"""
return self.raw.getNumBytes()
@property
def varnode(self): # type: () -> Varnode
"""Return the varnode associated with this register
Warning: this doesn't support registers that are not byte-aligned
(for example, flag registers). It will round the address down to byte.
"""
raw = GhVarnode(self.raw.getAddress(), self.raw.getNumBytes())
return Varnode(raw)
class Varnode(GhidraWrapper):
@property
def has_value(self): # type: () -> bool
"""Return true if this varnode can be converted to a integer value.
In particular, this will return true for Address and Constant varnodes"""
return self.value is not None
@property
def value(self): # type: () -> int|None
"""Get the value of this varnode. Traverse defining pcodeops if necessary."""
if self.is_address or self.is_constant:
return self.offset
if self.defining_pcodeop is None:
return None
return self.defining_pcodeop.result
@property
def offset(self): # type: () -> int
return int(self.raw.getOffset())
@property
def size(self): # type: () -> int
return self.raw.getSize()
@property
def high(self): # type: () -> HighVariable
return HighVariable(self.raw.getHigh())
@property
def symbol(self): # type: () -> HighSymbol
return self.high.symbol
@property
def is_constant(self): # type: () -> bool
"""Note: addresses are not constants in Ghidra-speak.
Use has_value to check if the varnode has a predictable value."""
return self.raw.isConstant()
@property
def is_register(self): # type: () -> bool
"""Return True if this varnode is stored entirely in a register.
Warning: this does not mean that it can be cast to a register! This may
be, for example, upper 32 bits of RAX. Use is_named_register instead."""
return self.raw.isRegister()
@property
def is_named_register(self): # type: () -> bool
""" "Return True if this varnode is stored entirely in a named register.
"Named" in this context means that it has a conventional name, like RAX.
Not all register varnodes are named, for example, the upper 32 bits of RAX
have no commonly used name."""
language = Program.current().getLanguage()
raw = language.getRegister(self.raw.getAddress(), self.size)
return raw is not None
@property
def as_register(self): # type: () -> str
"""Return the name of the register this varnode is stored in.
Warning: even if is_register returns true, this does not mean you can use
this method safely. Use is_named_register to make sure."""
language = Program.current().getLanguage()
raw = language.getRegister(self.raw.getAddress(), self.size)
return raw.getName()
@property
def is_address(self): # type: () -> bool
return self.raw.isAddress()
@property
def is_unique(self): # type: () -> bool
return self.raw.isUnique()
@property
def is_hash(self): # type: () -> bool
return self.raw.isHash()
@property
def is_stack(self): # type: () -> bool
spaceid = self.raw.getSpace()
spacetype = AddressSpace.ID_TYPE_MASK & spaceid
return spacetype == AddressSpace.TYPE_STACK
def rename(self, new_name): # type: (str) -> None
"""Try to rename the current varnode. This only makes sense for variables."""
self.symbol.rename(new_name)
@property
def free(self): # type: () -> Varnode
return Varnode(GhVarnode(self.raw.getAddress(), self.raw.getSize()))
@property
def simple(self): # type: () -> int|str
"""Convert Varnode to a primitive value (int or a string representation)
More specifically, this will convert constants and addresses into integers,
for registers names are returned, and for unique and hash varnodes ad-hoc
string encoding is used (hash:ID or uniq:ID where ID is varnode identifier).
This is useful for simple analyses when programmer already knows what
type of value is expected at the given position."""
value = self.value
if value is not None:
return value
elif self.is_register:
if self.is_named_register:
return self.as_register
return "reg:{:x}:{:x}".format(self.offset, self.size)
elif self.is_unique:
return "uniq:{:x}:{:x}".format(self.offset, self.size)
elif self.is_hash:
return "hash:{:x}:{:x}".format(self.offset, self.size)
elif self.is_stack: