From fc49910aae85150c768f6c44928a5efe6e34daca Mon Sep 17 00:00:00 2001 From: Gillou68310 Date: Mon, 10 Feb 2025 09:20:22 +0100 Subject: [PATCH 1/4] Rework https://github.com/simonlindholm/asm-differ/commit/ea176989620cef521198071a1aa56c7ede86be5f using pre_process --- diff.py | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/diff.py b/diff.py index 2b8f13e..ae24ce0 100755 --- a/diff.py +++ b/diff.py @@ -2032,6 +2032,24 @@ def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str: class AsmProcessorX86(AsmProcessor): + def pre_process( + self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str] + ) -> Tuple[str, str]: + if ( + comment is not None + and ( + next_row is None + or re.search(self.config.arch.re_reloc, next_row) is None + ) + and mnemonic == "call" + ): + # if the mnemonic is call and the comment doesn't match + # <.text+0x...> replace the args with the contents of the comment + if re.search(r"<.+\+0x[0-9a-fA-F]+>", comment) is None: + args = comment[1:-1] + + return mnemonic, args + def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: if "WRTSEG" in row: # ignore WRTSEG (watcom) return prev, None @@ -2049,7 +2067,7 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: # Example call a2f # Example call *0 # Example jmp 64 - elif mnemonic in X86_BRANCH_INSTRUCTIONS: + elif mnemonic in X86_BRANCH_INSTRUCTIONS or "call" in mnemonic: addr_imm = re.search(r"(^|(?<=\*)|(?<=\*\%cs\:))[0-9a-f]+", args) # Direct use of reloc @@ -2351,7 +2369,6 @@ class ArchSettings: } X86_BRANCH_INSTRUCTIONS = { - "call", "jmp", "ljmp", "ja", @@ -2930,14 +2947,6 @@ def process(dump: str, config: Config) -> List[Line]: break i += 1 - # Example call 0 - if arch.name is "x86" and mnemonic == "call" and comment and symbol is None: - addr_imm = re.search(r"(?:0x)?0+$", original) - if addr_imm is not None: - start, end = addr_imm.span() - symbol = comment[1 : len(comment) - 1] - original = original[:start] + symbol - is_text_relative_j = False if ( arch.name in MIPS_ARCH_NAMES From 7802ff10869e1e48aed5c009e53b9023f66bbe09 Mon Sep 17 00:00:00 2001 From: Gillou68310 Date: Mon, 10 Feb 2025 09:54:53 +0100 Subject: [PATCH 2/4] Optional 0x prefix --- diff.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/diff.py b/diff.py index ae24ce0..9e34a7a 100755 --- a/diff.py +++ b/diff.py @@ -2080,14 +2080,14 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: # Example movb $0x0,0x0(%si) if not addr_imm: - addr_imm = re.search(r"(?<=,)0x0+(?=\(.*\))", args) + addr_imm = re.search(r"(?<=,)(?:0x)?0+(?=\(.*\))", args) # Example 0x0,0x8(%edi) # Example 0x0,%edi # Example *0x0(,%edx,4) # Example $0x0,0x4(%edi) if not addr_imm: - addr_imm = re.search(r"(^\$?|(?<=\*))0x0", args) + addr_imm = re.search(r"(^\$?|(?<=\*))(?:0x)?0", args) # Offset value @@ -2099,17 +2099,19 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: # Example movb $0x0,0x4(%si) if not addr_imm: - addr_imm = re.search(r"(?<=,)0x[0-9a-f]+", args) + addr_imm = re.search(r"(?<=,)(?:0x)?[0-9a-f]+", args) offset = True # Example 0x4,%eax # Example $0x4,%eax if not addr_imm: - addr_imm = re.search(r"(^|(?<=\*)|(?:\$))0x[0-9a-f]+", args) + addr_imm = re.search(r"(^|(?<=\*)|(?:\$))(?:0x)?[0-9a-f]+", args) offset = True if not addr_imm: - addr_imm = re.search(r"(^|(?<=\*)|(?<=\%[fgdecs]s\:))0x[0-9a-f]+", args) + addr_imm = re.search( + r"(^|(?<=\*)|(?<=\%[fgdecs]s\:))(?:0x)?[0-9a-f]+", args + ) offset = True if not addr_imm: From b8911c7ce5066d8faf6b5b65a36c4c03c2ea8108 Mon Sep 17 00:00:00 2001 From: Gillou68310 Date: Tue, 11 Feb 2025 09:58:35 +0100 Subject: [PATCH 3/4] More fixes --- diff.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/diff.py b/diff.py index 9e34a7a..9ce120f 100755 --- a/diff.py +++ b/diff.py @@ -2087,7 +2087,7 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: # Example *0x0(,%edx,4) # Example $0x0,0x4(%edi) if not addr_imm: - addr_imm = re.search(r"(^\$?|(?<=\*))(?:0x)?0", args) + addr_imm = re.search(r"(^\$?|(?<=\*))(?:0x)?0(?!x)", args) # Offset value @@ -2611,7 +2611,7 @@ class ArchSettings: # This destroys the objdump output processor logic, so we avoid this. arch_flags=["--no-show-raw-insn"], branch_instructions=X86_BRANCH_INSTRUCTIONS, - instructions_with_address_immediates=X86_BRANCH_INSTRUCTIONS.union({"mov"}), + instructions_with_address_immediates=X86_BRANCH_INSTRUCTIONS.union({"mov", "call"}), proc=AsmProcessorX86, ) From 30450363960825f84129c5ad702ee1903327672e Mon Sep 17 00:00:00 2001 From: Gillou68310 Date: Tue, 11 Feb 2025 10:12:10 +0100 Subject: [PATCH 4/4] Remove leading $ in offsets --- diff.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/diff.py b/diff.py index 9ce120f..09610f8 100755 --- a/diff.py +++ b/diff.py @@ -2169,7 +2169,10 @@ def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]: assert False, f"unknown relocation type '{row}' for line '{prev}'" if offset: - repl = f"{repl}+{addr_imm.group()}" + of = addr_imm.group() + if of[0] == "$": + of = of[1:] + repl = f"{repl}+{of}" return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl