-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
helperFunctions.py
382 lines (294 loc) · 11.2 KB
/
helperFunctions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
# SPDX-FileCopyrightText: 2021 Rot127 <[email protected]>
#
# SPDX-License-Identifier: LGPL-3.0-only
import re
from pathlib import Path
from bitarray import bitarray
from enum import IntEnum
from typing.io import TextIO
import PluginInfo
from UnexpectedException import UnexpectedException
try:
from colorama import init, Fore, Style
init()
colorama_imported = True
except ImportError:
print("colorama package not found. Enjoy your Script Noire :)")
colorama_imported = False
class LogLevel(IntEnum):
TODO = 0
ERROR = 1
WARNING = 2
INFO = 3
DEBUG = 4
VERBOSE = 5
LOG_LEVEL = LogLevel.INFO
def log(msg: str, verbosity: LogLevel = LogLevel.INFO) -> None:
"""
Args:
msg: The message to log
verbosity: msg level: error, log
Returns: None
"""
if verbosity > LOG_LEVEL:
return
if colorama_imported:
if verbosity == LogLevel.ERROR:
print("[" + Fore.RED + "X" + Style.RESET_ALL + "] {}".format(Fore.RED + msg + Style.RESET_ALL))
elif verbosity == LogLevel.WARNING:
print(
"["
+ Fore.LIGHTYELLOW_EX
+ "!"
+ Style.RESET_ALL
+ "] {}".format(Fore.LIGHTYELLOW_EX + msg + Style.RESET_ALL)
)
elif verbosity == LogLevel.INFO:
print("[" + Fore.BLUE + "*" + Style.RESET_ALL + "] {}".format(msg))
elif verbosity == LogLevel.DEBUG:
print("[" + Fore.LIGHTMAGENTA_EX + "#" + Style.RESET_ALL + "] {}".format(msg))
elif verbosity == LogLevel.VERBOSE:
print("[" + Fore.LIGHTWHITE_EX + "-" + Style.RESET_ALL + "] {}".format(msg))
elif verbosity == LogLevel.TODO:
print("[" + Fore.GREEN + "T" + Style.RESET_ALL + "] {}".format(msg))
else:
if verbosity == LogLevel.ERROR:
print("[X] {}".format(msg))
elif verbosity == LogLevel.WARNING:
print("[!] {}".format(msg))
elif verbosity == LogLevel.INFO:
print("[*] {}".format(msg))
elif verbosity == LogLevel.DEBUG:
print("[#] {}".format(msg))
elif verbosity == LogLevel.VERBOSE:
print("[-] {}".format(msg))
elif verbosity == LogLevel.TODO:
print("[T] {}".format(msg))
def standardize_syntax_objdump(syntax: str) -> str:
"""Change instruction syntax to match Qualcomm's objdump output.
Args:
syntax (str): instruction syntax, probably as was obtained from the parsed manual.
Returns:
str: matching objdump syntax (as close as possible).
TODO:
* Care should be taken not to modify the syntax patterns used in the decoder
to recognize different attributes of the instruction, e.g., ``Rd`` can
be split with a space like ``R d``.
* Document the most complex regex.
"""
# Add spaces to certain chars like '=' and '()'
both_spaces = ["=", "+", "-", "*", "/", "&", "|", "<<", "^"]
left_space = ["(", "!"]
right_space = [")", ","]
for c in both_spaces:
syntax = syntax.replace(c, " " + c + " ")
for c in left_space:
syntax = syntax.replace(c, " " + c)
for c in right_space:
syntax = syntax.replace(c, c + " ")
syntax = re.sub(r"\s{2,}", " ", syntax)
# TODO: Special hack for the unary minus.
syntax = re.sub(r"\#\s-\s", "#-", syntax)
syntax = re.sub(r"\(\s*", "(", syntax)
syntax = re.sub(r"\s*\)", ")", syntax)
# Compound assignment
syntax = re.sub(r"([\+\-\*\/\&\|\^\!]) =", r"\1=", syntax)
syntax = syntax.replace(" ,", ",")
syntax = syntax.replace(" .", ".")
# Remove parenthesis from (!p0.new). just to match objdump,
# but I prefer it with parenthesis.
if ";" not in syntax:
m = re.search(r"\( (\s* ! \s* [pP]\w(.new)? \s*) \)", syntax, re.X)
if m:
syntax = syntax.replace("(" + m.group(1) + ")", m.group(1))
# syntax = re.sub(r'\( (\s* ! \s* [pP]\w(.new)? \s*) \)', r'\1', syntax, re.X)
# TODO: The re.sub is not working, don't know why..
syntax = syntax.replace("dfcmp", "cmp")
syntax = syntax.replace("sfcmp", "cmp")
# Special cases: ++, ==, !=
syntax = syntax.replace("+ +", "++")
syntax = syntax.replace("= =", "==")
syntax = syntax.replace("! =", "!=")
# Special cases: <<N, <<1, <<16, >>1
syntax = syntax.replace(": << N", ":<<N")
syntax = syntax.replace(": << 1", ":<<1")
syntax = syntax.replace(": >> 1", ":>>1")
syntax = syntax.strip()
return syntax
def bitarray_to_uint(array: bitarray, endian: str = "little") -> int:
if endian == "little":
bits = array.to01()
return int(bits[::-1], 2)
elif endian == "big":
return int(array.to01(), 2)
else:
raise UnexpectedException("Endian can only be 'little' or 'big'. Was: {}".format(endian))
def list_to_bitarray(bit_list: list, endian="little") -> bitarray:
"""Converts a list to the bitarray.
The element at the list index 0 corresponds to the element at the lowest position/address.
Example: [0, 0, 1] -> "001"
The left most bit is at the lowest address.
Args:
bit_list: The list with bits set.
endian: The endian of the list. [0, 0, 1] in little = 4 in big = 1
Returns: For [0, 0, 1], endian=big: bitarray("001", big)
"""
s = ""
for bit in bit_list:
s += str(bit)
return bitarray(s, endian)
def list_to_int(bit_list: list, endian="little") -> int:
ret = 0
if endian == "big":
for bit in bit_list:
ret = (ret << 1) | bit
else:
for bit in bit_list[::-1]:
ret = (ret << 1) | bit
return ret
# TODO: support more syntax constructs
def make_c_block(
lines: list,
begin: str = "",
end: str = "",
ret: str = "",
indent_depth: int = 1,
) -> list:
"""
Args: Creates a C code block with curly braces (useful for if/else or switch cases).
lines: The lines of code.
begin: The statement before the opening curly bracket.
ret: The statement before the closing curly bracket.
end: The statement after the closing curly bracket.
indent_depth: The indention depth of the code block. If >1 the begin statement and all brackets will be indented
as well.
Returns: List with the formatted lines of code.
"""
new = []
indent: str = PluginInfo.LINE_INDENT * indent_depth
p_ind: str = PluginInfo.LINE_INDENT * (indent_depth - 1)
if begin != "":
new += [p_ind + begin + " {\n"]
else:
new += ["{"]
for line in lines:
new += [indent + line]
if ret != "":
new += [indent + ret]
if end != "":
new += [p_ind + "} " + end]
else:
new += [p_ind + "}"]
return new
def set_pos_after_license(file: TextIO) -> None:
for line in file:
if re.search(r"SPDX-License-Identifier", line):
return
file.seek(0, 0)
return
def get_delimiter_line() -> str:
return "{}\n".format(PluginInfo.GENERATION_WARNING_DELIMITER)
def get_generation_warning_c_code() -> str:
url = PluginInfo.REPO_URL
msg = get_delimiter_line()
msg += "// The following code is generated.\n"
msg += "// Do not edit. Repository of code generator:\n"
msg += "// {}\n".format(url)
return msg
def get_license() -> str:
lcs = "// SPDX-FileCopyrightText: 2021 Rot127 <[email protected]>\n"
lcs += "// SPDX-License-Identifier: LGPL-3.0-only\n"
return lcs
def get_generation_timestamp(conf: dict) -> str:
"""Returns a C comment stating the last LLVM commit hash, the date of the commit and
the date of generation.
:param conf: The LLVMImporter.config
"""
import datetime
date = datetime.datetime.now().astimezone().replace(microsecond=0).isoformat(" ")
commit = "// LLVM commit: {}\n".format(conf["LLVM_COMMIT_HASH"])
commit += "// LLVM commit date: {}\n".format(conf["LLVM_COMMIT_DATE"])
commit += "// Date of code generation: {}".format(date)
return commit
def src_matches_old_src(new_src: str, comp_src_file: Path) -> bool:
"""Compares each line of the new_src string and the src code in the file comp_src_file."""
try:
with open(comp_src_file) as f:
for line in f:
if "Date of code generation" in line:
break
old_src = f.readlines()
except FileNotFoundError:
return False
l_new = "".join(new_src)
l_old = "".join(old_src)
# Remove clang-format introduced blanks.
if re.sub(r"\s", "", l_new) != re.sub(r"\s", "", l_old):
return False
return True
def indent_code_block(code: str, indent_depth: int) -> str:
ret = ""
indent: str = PluginInfo.LINE_INDENT
for line in code.splitlines(keepends=True):
ret += (indent * indent_depth) + line
return ret
def unfold_llvm_sequence(sequence: str) -> list:
"""In the LLVM code generator one can define sequences of values.
Here we build a given sequence and return the result as list.
E.g.: (sequence "D%u", 0, 4) -> [D0, D1, D2, D3, D4]
"""
s = re.search(r"\"(.+)\"", sequence).group(1)
start = int(re.search(r", (\d*),", sequence).group(1))
end = int(re.search(r", (\d*)\)", sequence).group(1))
result = [re.sub(r"%[a-z]", str(x), s) for x in range(start, end + 1)]
return result
def get_include_guard(filename: str) -> str:
name = re.sub(r"\.", r"_", filename)
name = name.upper()
return "#ifndef {}\n#define {}\n".format(name, name)
def surround_with_include_guard(filename: str, lines: list) -> list:
lines.insert(0, get_include_guard(filename))
lines.append("\n\n#endif\n")
return lines
def include_file(filename):
"""Reads and returns the content of a hand-written src file.
The does not return the license header and everything before that.
"""
with open(filename) as include:
set_pos_after_license(include)
return "".join(include.readlines())
def normalize_llvm_syntax(llvm_syntax: str) -> str:
syntax = re.sub(r"#{0,2}\$", "", llvm_syntax)
# Any number which stands before a register or immediate letter.
syntax = re.sub(r"([A-Z][a-z,A-Z]+)[0-9]+", r"\1", syntax)
log("Normalized syntax: {} -> {}".format(llvm_syntax, syntax), LogLevel.VERBOSE)
return syntax
def gen_c_doxygen(desc: str, ret: (str, str) = None, args: [dict] = None) -> str:
"""
Generates a doxygen doc string and returns it.
All description strings can contain new lines.
Args:
desc: The general description.
args: Argument list description. Contains dicts {'name': str, 'desc': str}
ret: Return value description with tuple('type', 'desc')
Returns: The doxygen string.
"""
def print_lines(lines: []) -> str:
tmp = ""
for line in lines:
tmp += f" * {line}\n"
return tmp
dl = desc.split("\n")
dox = f"/**\n"
dox += f" * \\brief {dl[0]}\n"
dox += print_lines(dl[1:])
if args:
for a in args:
dl = a["desc"].split("\n")
dox += f' * \\param {a["name"]}: {dl[0]}'
dox += print_lines(dl[1:])
if ret:
dl = ret[1].split("\n")
dox += f"* \\return {ret[0]} {dl[0]}"
dox += print_lines(dl[1:])
return dox + " */"