From c98c40227e8cd976a08ff0f6dc386b5d33f62f84 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Sat, 9 Dec 2023 10:03:02 +0000 Subject: [PATCH] gh-112720: Move instruction formatting from the dis.Instruction class to a new class dis.InstructionFormatter. Add the ArgResolver class. (#112722) --- Lib/dis.py | 422 ++++++++++++++++++++++--------------------- Lib/test/test_dis.py | 15 +- 2 files changed, 232 insertions(+), 205 deletions(-) diff --git a/Lib/dis.py b/Lib/dis.py index 8d3885d2526b70..efa935c5a6a0b6 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -336,93 +336,6 @@ class Instruction(_Instruction): covered by this instruction """ - @staticmethod - def _get_argval_argrepr(op, arg, offset, co_consts, names, varname_from_oparg, - labels_map): - get_name = None if names is None else names.__getitem__ - argval = None - argrepr = '' - deop = _deoptop(op) - if arg is not None: - # Set argval to the dereferenced value of the argument when - # available, and argrepr to the string representation of argval. - # _disassemble_bytes needs the string repr of the - # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. - argval = arg - if deop in hasconst: - argval, argrepr = _get_const_info(deop, arg, co_consts) - elif deop in hasname: - if deop == LOAD_GLOBAL: - argval, argrepr = _get_name_info(arg//2, get_name) - if (arg & 1) and argrepr: - argrepr = f"{argrepr} + NULL" - elif deop == LOAD_ATTR: - argval, argrepr = _get_name_info(arg//2, get_name) - if (arg & 1) and argrepr: - argrepr = f"{argrepr} + NULL|self" - elif deop == LOAD_SUPER_ATTR: - argval, argrepr = _get_name_info(arg//4, get_name) - if (arg & 1) and argrepr: - argrepr = f"{argrepr} + NULL|self" - else: - argval, argrepr = _get_name_info(arg, get_name) - elif deop in hasjabs: - argval = arg*2 - argrepr = f"to L{labels_map[argval]}" - elif deop in hasjrel: - signed_arg = -arg if _is_backward_jump(deop) else arg - argval = offset + 2 + signed_arg*2 - caches = _get_cache_size(_all_opname[deop]) - argval += 2 * caches - if deop == ENTER_EXECUTOR: - argval += 2 - argrepr = f"to L{labels_map[argval]}" - elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): - arg1 = arg >> 4 - arg2 = arg & 15 - val1, argrepr1 = _get_name_info(arg1, varname_from_oparg) - val2, argrepr2 = _get_name_info(arg2, varname_from_oparg) - argrepr = argrepr1 + ", " + argrepr2 - argval = val1, val2 - elif deop in haslocal or deop in hasfree: - argval, argrepr = _get_name_info(arg, varname_from_oparg) - elif deop in hascompare: - argval = cmp_op[arg >> 5] - argrepr = argval - if arg & 16: - argrepr = f"bool({argrepr})" - elif deop == CONVERT_VALUE: - argval = (None, str, repr, ascii)[arg] - argrepr = ('', 'str', 'repr', 'ascii')[arg] - elif deop == SET_FUNCTION_ATTRIBUTE: - argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS) - if arg & (1<' marker arrow as part of the line *offset_width* sets the width of the instruction offset field *label_width* sets the width of the label field + + *line_offset* the line number (within the code unit) """ + self.file = file + self.lineno_width = lineno_width + self.offset_width = offset_width + self.label_width = label_width + + + def print_instruction(self, instr, mark_as_current=False): + """Format instruction details for inclusion in disassembly output.""" + lineno_width = self.lineno_width + offset_width = self.offset_width + label_width = self.label_width + + new_source_line = (lineno_width > 0 and + instr.starts_line and + instr.offset > 0) + if new_source_line: + print(file=self.file) + fields = [] # Column: Source code line number if lineno_width: - if self.starts_line: - lineno_fmt = "%%%dd" if self.line_number is not None else "%%%ds" + if instr.starts_line: + lineno_fmt = "%%%dd" if instr.line_number is not None else "%%%ds" lineno_fmt = lineno_fmt % lineno_width - lineno = self.line_number if self.line_number is not None else '--' + lineno = _NO_LINENO if instr.line_number is None else instr.line_number fields.append(lineno_fmt % lineno) else: fields.append(' ' * lineno_width) # Column: Label - if self.label is not None: - lbl = f"L{self.label}:" + if instr.label is not None: + lbl = f"L{instr.label}:" fields.append(f"{lbl:>{label_width}}") else: fields.append(' ' * label_width) # Column: Instruction offset from start of code sequence if offset_width > 0: - fields.append(f"{repr(self.offset):>{offset_width}} ") + fields.append(f"{repr(instr.offset):>{offset_width}} ") # Column: Current instruction indicator if mark_as_current: fields.append('-->') else: fields.append(' ') # Column: Opcode name - fields.append(self.opname.ljust(_OPNAME_WIDTH)) + fields.append(instr.opname.ljust(_OPNAME_WIDTH)) # Column: Opcode argument - if self.arg is not None: - arg = repr(self.arg) + if instr.arg is not None: + arg = repr(instr.arg) # If opname is longer than _OPNAME_WIDTH, we allow it to overflow into # the space reserved for oparg. This results in fewer misaligned opargs # in the disassembly output. - opname_excess = max(0, len(self.opname) - _OPNAME_WIDTH) - fields.append(repr(self.arg).rjust(_OPARG_WIDTH - opname_excess)) + opname_excess = max(0, len(instr.opname) - _OPNAME_WIDTH) + fields.append(repr(instr.arg).rjust(_OPARG_WIDTH - opname_excess)) # Column: Opcode argument details - if self.argrepr: - fields.append('(' + self.argrepr + ')') - return ' '.join(fields).rstrip() + if instr.argrepr: + fields.append('(' + instr.argrepr + ')') + print(' '.join(fields).rstrip(), file=self.file) + + def print_exception_table(self, exception_entries): + file = self.file + if exception_entries: + print("ExceptionTable:", file=file) + for entry in exception_entries: + lasti = " lasti" if entry.lasti else "" + start = entry.start_label + end = entry.end_label + target = entry.target_label + print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file) + + +class ArgResolver: + def __init__(self, co_consts, names, varname_from_oparg, labels_map): + self.co_consts = co_consts + self.names = names + self.varname_from_oparg = varname_from_oparg + self.labels_map = labels_map + + def get_argval_argrepr(self, op, arg, offset): + get_name = None if self.names is None else self.names.__getitem__ + argval = None + argrepr = '' + deop = _deoptop(op) + if arg is not None: + # Set argval to the dereferenced value of the argument when + # available, and argrepr to the string representation of argval. + # _disassemble_bytes needs the string repr of the + # raw name index for LOAD_GLOBAL, LOAD_CONST, etc. + argval = arg + if deop in hasconst: + argval, argrepr = _get_const_info(deop, arg, self.co_consts) + elif deop in hasname: + if deop == LOAD_GLOBAL: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL" + elif deop == LOAD_ATTR: + argval, argrepr = _get_name_info(arg//2, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + elif deop == LOAD_SUPER_ATTR: + argval, argrepr = _get_name_info(arg//4, get_name) + if (arg & 1) and argrepr: + argrepr = f"{argrepr} + NULL|self" + else: + argval, argrepr = _get_name_info(arg, get_name) + elif deop in hasjabs: + argval = arg*2 + argrepr = f"to L{self.labels_map[argval]}" + elif deop in hasjrel: + signed_arg = -arg if _is_backward_jump(deop) else arg + argval = offset + 2 + signed_arg*2 + caches = _get_cache_size(_all_opname[deop]) + argval += 2 * caches + if deop == ENTER_EXECUTOR: + argval += 2 + argrepr = f"to L{self.labels_map[argval]}" + elif deop in (LOAD_FAST_LOAD_FAST, STORE_FAST_LOAD_FAST, STORE_FAST_STORE_FAST): + arg1 = arg >> 4 + arg2 = arg & 15 + val1, argrepr1 = _get_name_info(arg1, self.varname_from_oparg) + val2, argrepr2 = _get_name_info(arg2, self.varname_from_oparg) + argrepr = argrepr1 + ", " + argrepr2 + argval = val1, val2 + elif deop in haslocal or deop in hasfree: + argval, argrepr = _get_name_info(arg, self.varname_from_oparg) + elif deop in hascompare: + argval = cmp_op[arg >> 5] + argrepr = argval + if arg & 16: + argrepr = f"bool({argrepr})" + elif deop == CONVERT_VALUE: + argval = (None, str, repr, ascii)[arg] + argrepr = ('', 'str', 'repr', 'ascii')[arg] + elif deop == SET_FUNCTION_ATTRIBUTE: + argrepr = ', '.join(s for i, s in enumerate(FUNCTION_ATTR_FLAGS) + if arg & (1< 0 - else: - show_lineno = False - if show_lineno: - maxlineno = max(linestarts_ints) + line_offset - if maxlineno >= 1000: - lineno_width = len(str(maxlineno)) - else: - lineno_width = 3 + offset_width = len(str(max(len(code) - 2, 9999))) if show_offsets else 0 - if lineno_width < len(str(None)) and None in linestarts.values(): - lineno_width = len(str(None)) - else: - lineno_width = 0 - if show_offsets: - maxoffset = len(code) - 2 - if maxoffset >= 10000: - offset_width = len(str(maxoffset)) - else: - offset_width = 4 - else: - offset_width = 0 - - label_width = -1 - for instr in _get_instructions_bytes(code, varname_from_oparg, names, - co_consts, linestarts, - line_offset=line_offset, - exception_entries=exception_entries, - co_positions=co_positions, - show_caches=show_caches, - original_code=original_code): - new_source_line = (show_lineno and - instr.starts_line and - instr.offset > 0) - if new_source_line: - print(file=file) + labels_map = _make_labels_map(original_code or code, exception_entries) + label_width = 4 + len(str(len(labels_map))) + + formatter = Formatter(file=file, + lineno_width=_get_lineno_width(linestarts), + offset_width=offset_width, + label_width=label_width, + line_offset=line_offset) + + arg_resolver = ArgResolver(co_consts, names, varname_from_oparg, labels_map) + instrs = _get_instructions_bytes(code, linestarts=linestarts, + line_offset=line_offset, + co_positions=co_positions, + show_caches=show_caches, + original_code=original_code, + labels_map=labels_map, + arg_resolver=arg_resolver) + + print_instructions(instrs, exception_entries, formatter, + show_caches=show_caches, lasti=lasti) + + +def print_instructions(instrs, exception_entries, formatter, show_caches=False, lasti=-1): + for instr in instrs: if show_caches: is_current_instr = instr.offset == lasti else: # Each CACHE takes 2 bytes is_current_instr = instr.offset <= lasti \ <= instr.offset + 2 * _get_cache_size(_all_opname[_deoptop(instr.opcode)]) - label_width = getattr(instr, 'label_width', label_width) - assert label_width >= 0 - print(instr._disassemble(lineno_width, is_current_instr, offset_width, label_width), - file=file) - if exception_entries: - print("ExceptionTable:", file=file) - for entry in exception_entries: - lasti = " lasti" if entry.lasti else "" - start = entry.start_label - end = entry.end_label - target = entry.target_label - print(f" L{start} to L{end} -> L{target} [{entry.depth}]{lasti}", file=file) + formatter.print_instruction(instr, is_current_instr) + formatter.print_exception_table(exception_entries) def _disassemble_str(source, **kwargs): """Compile the source string, then disassemble the code object.""" @@ -927,15 +944,18 @@ def __init__(self, x, *, first_line=None, current_offset=None, show_caches=False def __iter__(self): co = self.codeobj + original_code = co.co_code + labels_map = _make_labels_map(original_code, self.exception_entries) + arg_resolver = ArgResolver(co.co_consts, co.co_names, co._varname_from_oparg, + labels_map) return _get_instructions_bytes(_get_code_array(co, self.adaptive), - co._varname_from_oparg, - co.co_names, co.co_consts, - self._linestarts, + linestarts=self._linestarts, line_offset=self._line_offset, - exception_entries=self.exception_entries, co_positions=co.co_positions(), show_caches=self.show_caches, - original_code=co.co_code) + original_code=original_code, + labels_map=labels_map, + arg_resolver=arg_resolver) def __repr__(self): return "{}({!r})".format(self.__class__.__name__, diff --git a/Lib/test/test_dis.py b/Lib/test/test_dis.py index 349790ecd7d075..0ea4dc4566a4a4 100644 --- a/Lib/test/test_dis.py +++ b/Lib/test/test_dis.py @@ -1785,6 +1785,12 @@ def __init__(self, *args): super().__init__(*args) self.maxDiff = None + def test_instruction_str(self): + # smoke test for __str__ + instrs = dis.get_instructions(simple) + for instr in instrs: + str(instr) + def test_default_first_line(self): actual = dis.get_instructions(simple) self.assertInstructionsEqual(list(actual), expected_opinfo_simple) @@ -1955,15 +1961,16 @@ def test_jump_target(self): self.assertEqual(10 + 2 + 1*2 + 100*2, instruction.jump_target) def test_argval_argrepr(self): - def f(*args): - return dis.Instruction._get_argval_argrepr( - *args, labels_map={24: 1}) + def f(opcode, oparg, offset, *init_args): + arg_resolver = dis.ArgResolver(*init_args) + return arg_resolver.get_argval_argrepr(opcode, oparg, offset) offset = 42 co_consts = (0, 1, 2, 3) names = {1: 'a', 2: 'b'} varname_from_oparg = lambda i : names[i] - args = (offset, co_consts, names, varname_from_oparg) + labels_map = {24: 1} + args = (offset, co_consts, names, varname_from_oparg, labels_map) self.assertEqual(f(opcode.opmap["POP_TOP"], None, *args), (None, '')) self.assertEqual(f(opcode.opmap["LOAD_CONST"], 1, *args), (1, '1')) self.assertEqual(f(opcode.opmap["LOAD_GLOBAL"], 2, *args), ('a', 'a'))