diff --git a/asm_differ/__init__.py b/asm_differ/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/asm_differ/diff.py b/asm_differ/diff.py
new file mode 100755
index 0000000..ca34574
--- /dev/null
+++ b/asm_differ/diff.py
@@ -0,0 +1,4014 @@
+#!/usr/bin/env python3
+# PYTHON_ARGCOMPLETE_OK
+import argparse
+import enum
+import sys
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    Iterator,
+    List,
+    Sequence,
+    Match,
+    NoReturn,
+    Optional,
+    Pattern,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
+
+
+def fail(msg: str) -> NoReturn:
+    print(msg, file=sys.stderr)
+    sys.exit(1)
+
+
+def static_assert_unreachable(x: NoReturn) -> NoReturn:
+    raise Exception("Unreachable! " + repr(x))
+
+
+# The encoding to use for reading the map file to text.
+# It is expected the map file will be ASCII-only or mostly,
+# which UTF-8 is compatible with.
+MAPFILE_ENCODING = "UTF-8"
+# In case the map file isn't UTF-8 and some bytes can't be decoded as UTF-8,
+# don't error and subtitute with U+FFFD.
+# (cf the Python documentation on `codecs.replace_errors`)
+MAPFILE_ENCODING_ERROR_HANDLER = "replace"
+
+
+class DiffMode(enum.Enum):
+    SINGLE = "single"
+    SINGLE_BASE = "single_base"
+    NORMAL = "normal"
+    THREEWAY_PREV = "3prev"
+    THREEWAY_BASE = "3base"
+
+
+# ==== COMMAND-LINE ====
+
+parser = argparse.ArgumentParser(
+    description="Diff MIPS, PPC, AArch64, ARM32, SH2, SH4, or m68k assembly."
+)
+start_argument = parser.add_argument(
+    "start",
+    help="Function name or address to start diffing from.",
+)
+parser.add_argument(
+    "end",
+    nargs="?",
+    help="Address to end diff at.",
+)
+parser.add_argument(
+    "-o",
+    dest="diff_obj",
+    action="store_true",
+    help="""Diff .o files rather than a whole binary. This makes it possible to
+    see symbol names. (Recommended)""",
+)
+parser.add_argument(
+    "-f",
+    "--file",
+    dest="file",
+    type=str,
+    help="""File path for a file being diffed. When used the map
+    file isn't searched for the function given. Useful for dynamically
+    linked libraries.""",
+)
+parser.add_argument(
+    "-e",
+    "--elf",
+    dest="diff_elf_symbol",
+    metavar="SYMBOL",
+    help="""Diff a given function in two ELFs, one being stripped and the other
+    one non-stripped. Requires objdump from binutils 2.33+.""",
+)
+parser.add_argument(
+    "-c",
+    "--source",
+    dest="show_source",
+    action="store_true",
+    help="Show source code (if possible). Only works with -o or -e.",
+)
+parser.add_argument(
+    "-C",
+    "--source-old-binutils",
+    dest="source_old_binutils",
+    action="store_true",
+    help="""Tweak --source handling to make it work with binutils < 2.33.
+    Implies --source.""",
+)
+parser.add_argument(
+    "-j",
+    "--section",
+    dest="diff_section",
+    default=".text",
+    metavar="SECTION",
+    help="Diff restricted to a given output section.",
+)
+parser.add_argument(
+    "-L",
+    "--line-numbers",
+    dest="show_line_numbers",
+    action="store_const",
+    const=True,
+    help="""Show source line numbers in output, when available. May be enabled by
+    default depending on diff_settings.py.""",
+)
+parser.add_argument(
+    "--no-line-numbers",
+    dest="show_line_numbers",
+    action="store_const",
+    const=False,
+    help="Hide source line numbers in output.",
+)
+parser.add_argument(
+    "--inlines",
+    dest="inlines",
+    action="store_true",
+    help="Show inline function calls (if possible). Only works with -o or -e.",
+)
+parser.add_argument(
+    "--base-asm",
+    dest="base_asm",
+    metavar="FILE",
+    help="Read assembly from given file instead of configured base img.",
+)
+parser.add_argument(
+    "--write-asm",
+    dest="write_asm",
+    metavar="FILE",
+    help="Write the current assembly output to file, e.g. for use with --base-asm.",
+)
+parser.add_argument(
+    "-m",
+    "--make",
+    dest="make",
+    action="store_true",
+    help="Automatically run 'make' on the .o file or binary before diffing.",
+)
+parser.add_argument(
+    "-l",
+    "--skip-lines",
+    dest="skip_lines",
+    metavar="LINES",
+    type=int,
+    default=0,
+    help="Skip the first LINES lines of output.",
+)
+parser.add_argument(
+    "-s",
+    "--stop-at-ret",
+    dest="stop_at_ret",
+    action="count",
+    help="""Stop disassembling at the first return instruction.
+    You can also pass -ss to stop at the second return instruction, and so on.""",
+)
+parser.add_argument(
+    "-i",
+    "--ignore-large-imms",
+    dest="ignore_large_imms",
+    action="store_true",
+    help="Pretend all large enough immediates are the same.",
+)
+parser.add_argument(
+    "-I",
+    "--ignore-addr-diffs",
+    dest="ignore_addr_diffs",
+    action="store_true",
+    help="Ignore address differences. Currently only affects AArch64 and ARM32.",
+)
+parser.add_argument(
+    "-B",
+    "--no-show-branches",
+    dest="show_branches",
+    action="store_false",
+    help="Don't visualize branches/branch targets.",
+)
+parser.add_argument(
+    "-R",
+    "--no-show-rodata-refs",
+    dest="show_rodata_refs",
+    action="store_false",
+    help="Don't show .rodata -> .text references (typically from jump tables).",
+)
+parser.add_argument(
+    "-S",
+    "--base-shift",
+    dest="base_shift",
+    metavar="N",
+    type=str,
+    default="0",
+    help="""Diff position N in our img against position N + shift in the base img.
+    Arithmetic is allowed, so e.g. |-S "0x1234 - 0x4321"| is a reasonable
+    flag to pass if it is known that position 0x1234 in the base img syncs
+    up with position 0x4321 in our img. Not supported together with -o.""",
+)
+parser.add_argument(
+    "-w",
+    "--watch",
+    dest="watch",
+    action="store_true",
+    help="""Automatically update when source/object files change.
+    Recommended in combination with -m.""",
+)
+parser.add_argument(
+    "-y",
+    "--yes",
+    dest="agree",
+    action="store_true",
+    help="""Automatically agree to any yes/no questions asked.
+    Useful if you really want to use the -w option without -m.""",
+)
+parser.add_argument(
+    "-0",
+    "--diff_mode=single_base",
+    dest="diff_mode",
+    action="store_const",
+    const=DiffMode.SINGLE_BASE,
+    help="""View the base asm only (not a diff).""",
+)
+parser.add_argument(
+    "-1",
+    "--diff_mode=single",
+    dest="diff_mode",
+    action="store_const",
+    const=DiffMode.SINGLE,
+    help="""View the current asm only (not a diff).""",
+)
+parser.add_argument(
+    "-3",
+    "--threeway=prev",
+    dest="diff_mode",
+    action="store_const",
+    const=DiffMode.THREEWAY_PREV,
+    help="""Show a three-way diff between target asm, current asm, and asm
+    prior to -w rebuild. Requires -w.""",
+)
+parser.add_argument(
+    "-b",
+    "--threeway=base",
+    dest="diff_mode",
+    action="store_const",
+    const=DiffMode.THREEWAY_BASE,
+    help="""Show a three-way diff between target asm, current asm, and asm
+    when diff.py was started. Requires -w.""",
+)
+parser.add_argument(
+    "--width",
+    dest="column_width",
+    metavar="COLS",
+    type=int,
+    default=50,
+    help="Sets the width of the left and right view column.",
+)
+parser.add_argument(
+    "--algorithm",
+    dest="algorithm",
+    default="levenshtein",
+    choices=["levenshtein", "difflib"],
+    help="""Diff algorithm to use. Levenshtein gives the minimum diff, while difflib
+    aims for long sections of equal opcodes. Defaults to %(default)s.""",
+)
+parser.add_argument(
+    "--max-size",
+    "--max-lines",
+    metavar="LINES",
+    dest="max_lines",
+    type=int,
+    default=1024,
+    help="The maximum length of the diff, in lines.",
+)
+parser.add_argument(
+    "--no-pager",
+    dest="no_pager",
+    action="store_true",
+    help="""Disable the pager; write output directly to stdout, then exit.
+    Incompatible with --watch.""",
+)
+parser.add_argument(
+    "--format",
+    choices=("color", "plain", "html", "json"),
+    default="color",
+    help="Output format, default is color. --format=html or json implies --no-pager.",
+)
+parser.add_argument(
+    "-U",
+    "--compress-matching",
+    metavar="N",
+    dest="compress_matching",
+    type=int,
+    help="""Compress streaks of matching lines, leaving N lines of context
+    around non-matching parts.""",
+)
+parser.add_argument(
+    "-V",
+    "--compress-sameinstr",
+    metavar="N",
+    dest="compress_sameinstr",
+    type=int,
+    help="""Compress streaks of lines with same instructions (but possibly
+    different regalloc), leaving N lines of context around other parts.""",
+)
+parser.add_argument(
+    "-d",
+    "--diff-function-symbols",
+    dest="diff_function_symbols",
+    action="store_true",
+    help="Include and diff function symbols.",
+)
+
+# ==== IMPORTS ====
+
+# (We do imports late to optimize auto-complete performance.)
+
+import abc
+from collections import Counter, defaultdict
+from dataclasses import asdict, dataclass, field, replace
+import difflib
+import html
+import itertools
+import json
+import os
+import queue
+import re
+import string
+import struct
+import subprocess
+import threading
+import time
+import traceback
+
+
+MISSING_PREREQUISITES = (
+    "Missing prerequisite python module {}. "
+    "Run `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
+)
+
+try:
+    from colorama import Back, Fore, Style
+    import watchdog
+except ModuleNotFoundError as e:
+    fail(MISSING_PREREQUISITES.format(e.name))
+
+# ==== CONFIG ====
+
+
+@dataclass
+class ProjectSettings:
+    arch_str: str
+    objdump_executable: str
+    objdump_flags: List[str]
+    build_command: List[str]
+    map_format: str
+    build_dir: str
+    map_address_offset: int
+    baseimg: Optional[str]
+    myimg: Optional[str]
+    mapfile: Optional[str]
+    source_directories: Optional[List[str]]
+    source_extensions: List[str]
+    show_line_numbers_default: bool
+    disassemble_all: bool
+    reg_categories: Dict[str, int]
+    expected_dir: str
+
+
+@dataclass
+class Compress:
+    context: int
+    same_instr: bool
+
+
+@dataclass
+class Config:
+    arch: "ArchSettings"
+
+    # Build/objdump options
+    diff_obj: bool
+    file: Optional[str]
+    make: bool
+    source_old_binutils: bool
+    diff_section: str
+    inlines: bool
+    max_function_size_lines: int
+    max_function_size_bytes: int
+
+    # Display options
+    formatter: "Formatter"
+    diff_mode: DiffMode
+    base_shift: int
+    skip_lines: int
+    compress: Optional[Compress]
+    show_rodata_refs: bool
+    show_branches: bool
+    show_line_numbers: bool
+    show_source: bool
+    stop_at_ret: Optional[int]
+    ignore_large_imms: bool
+    ignore_addr_diffs: bool
+    algorithm: str
+    reg_categories: Dict[str, int]
+    diff_function_symbols: bool
+
+    # Score options
+    score_stack_differences = True
+    penalty_stackdiff = 1
+    penalty_regalloc = 5
+    penalty_reordering = 60
+    penalty_insertion = 100
+    penalty_deletion = 100
+
+
+def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings:
+    return ProjectSettings(
+        arch_str=settings.get("arch", "mips"),
+        baseimg=settings.get("baseimg"),
+        myimg=settings.get("myimg"),
+        mapfile=settings.get("mapfile"),
+        build_command=settings.get(
+            "make_command", ["make", *settings.get("makeflags", [])]
+        ),
+        source_directories=settings.get("source_directories"),
+        source_extensions=settings.get(
+            "source_extensions", [".c", ".h", ".cpp", ".hpp", ".s"]
+        ),
+        objdump_executable=get_objdump_executable(settings.get("objdump_executable")),
+        objdump_flags=settings.get("objdump_flags", []),
+        expected_dir=settings.get("expected_dir", "expected/"),
+        map_format=settings.get("map_format", "gnu"),
+        map_address_offset=settings.get(
+            "map_address_offset", settings.get("ms_map_address_offset", 0)
+        ),
+        build_dir=settings.get("build_dir", settings.get("mw_build_dir", "build/")),
+        show_line_numbers_default=settings.get("show_line_numbers_default", True),
+        disassemble_all=settings.get("disassemble_all", False),
+        reg_categories=settings.get("reg_categories", {}),
+    )
+
+
+def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config:
+    arch = get_arch(project.arch_str)
+
+    formatter: Formatter
+    if args.format == "plain":
+        formatter = PlainFormatter(column_width=args.column_width)
+    elif args.format == "color":
+        formatter = AnsiFormatter(column_width=args.column_width)
+    elif args.format == "html":
+        formatter = HtmlFormatter()
+    elif args.format == "json":
+        formatter = JsonFormatter(arch_str=arch.name)
+    else:
+        raise ValueError(f"Unsupported --format: {args.format}")
+
+    compress = None
+    if args.compress_matching is not None:
+        compress = Compress(args.compress_matching, False)
+    if args.compress_sameinstr is not None:
+        if compress is not None:
+            raise ValueError(
+                "Cannot pass both --compress-matching and --compress-sameinstr"
+            )
+        compress = Compress(args.compress_sameinstr, True)
+
+    show_line_numbers = args.show_line_numbers
+    if show_line_numbers is None:
+        show_line_numbers = project.show_line_numbers_default
+
+    return Config(
+        arch=arch,
+        # Build/objdump options
+        diff_obj=args.diff_obj,
+        file=args.file,
+        make=args.make,
+        source_old_binutils=args.source_old_binutils
+        or "llvm-" in project.objdump_executable,
+        diff_section=args.diff_section,
+        inlines=args.inlines,
+        max_function_size_lines=args.max_lines,
+        max_function_size_bytes=args.max_lines * 4,
+        # Display options
+        formatter=formatter,
+        diff_mode=args.diff_mode or DiffMode.NORMAL,
+        base_shift=eval_int(
+            args.base_shift, "Failed to parse --base-shift (-S) argument as an integer."
+        ),
+        skip_lines=args.skip_lines,
+        compress=compress,
+        show_rodata_refs=args.show_rodata_refs,
+        show_branches=args.show_branches,
+        show_line_numbers=show_line_numbers,
+        show_source=args.show_source or args.source_old_binutils,
+        stop_at_ret=args.stop_at_ret,
+        ignore_large_imms=args.ignore_large_imms,
+        ignore_addr_diffs=args.ignore_addr_diffs,
+        algorithm=args.algorithm,
+        reg_categories=project.reg_categories,
+        diff_function_symbols=args.diff_function_symbols,
+    )
+
+
+def get_objdump_executable(objdump_executable: Optional[str]) -> str:
+    if objdump_executable is not None:
+        return objdump_executable
+
+    objdump_candidates = [
+        "mips-linux-gnu-objdump",
+        "mips64-elf-objdump",
+        "mips-elf-objdump",
+        "sh-elf-objdump",
+        "sh4-linux-gnu-objdump",
+        "m68k-elf-objdump",
+    ]
+    for objdump_cand in objdump_candidates:
+        try:
+            subprocess.check_call(
+                [objdump_cand, "--version"],
+                stdout=subprocess.DEVNULL,
+                stderr=subprocess.DEVNULL,
+            )
+            return objdump_cand
+        except subprocess.CalledProcessError:
+            pass
+        except FileNotFoundError:
+            pass
+
+    return fail(
+        f"Missing binutils; please ensure {' or '.join(objdump_candidates)} exists, or configure objdump_executable."
+    )
+
+
+def get_arch(arch_str: str) -> "ArchSettings":
+    for settings in ARCH_SETTINGS:
+        if arch_str == settings.name:
+            return settings
+    raise ValueError(f"Unknown architecture: {arch_str}")
+
+
+BUFFER_CMD: List[str] = ["tail", "-c", str(10**9)]
+
+# -S truncates long lines instead of wrapping them
+# -R interprets color escape sequences
+# -i ignores case when searching
+# -c something about how the screen gets redrawn; I don't remember the purpose
+# -#6 makes left/right arrow keys scroll by 6 characters
+LESS_CMD: List[str] = ["less", "-SRic", "-+F", "-+X", "-#6"]
+
+DEBOUNCE_DELAY: float = 0.1
+
+# ==== FORMATTING ====
+
+
+@enum.unique
+class BasicFormat(enum.Enum):
+    NONE = enum.auto()
+    IMMEDIATE = enum.auto()
+    STACK = enum.auto()
+    REGISTER = enum.auto()
+    REGISTER_CATEGORY = enum.auto()
+    DELAY_SLOT = enum.auto()
+    DIFF_CHANGE = enum.auto()
+    DIFF_ADD = enum.auto()
+    DIFF_REMOVE = enum.auto()
+    SOURCE_FILENAME = enum.auto()
+    SOURCE_FUNCTION = enum.auto()
+    SOURCE_LINE_NUM = enum.auto()
+    SOURCE_OTHER = enum.auto()
+
+
+@dataclass(frozen=True)
+class RotationFormat:
+    group: str
+    index: int
+    key: str
+
+
+Format = Union[BasicFormat, RotationFormat]
+FormatFunction = Callable[[str], Format]
+
+
+class Text:
+    segments: List[Tuple[str, Format]]
+
+    def __init__(self, line: str = "", f: Format = BasicFormat.NONE) -> None:
+        self.segments = [(line, f)] if line else []
+
+    def reformat(self, f: Format) -> "Text":
+        return Text(self.plain(), f)
+
+    def plain(self) -> str:
+        return "".join(s for s, f in self.segments)
+
+    def __repr__(self) -> str:
+        return f"<Text: {self.plain()!r}>"
+
+    def __bool__(self) -> bool:
+        return any(s for s, f in self.segments)
+
+    def __str__(self) -> str:
+        # Use Formatter.apply(...) instead
+        return NotImplemented
+
+    def __eq__(self, other: object) -> bool:
+        return NotImplemented
+
+    def __add__(self, other: Union["Text", str]) -> "Text":
+        if isinstance(other, str):
+            other = Text(other)
+        result = Text()
+        # If two adjacent segments have the same format, merge their lines
+        if (
+            self.segments
+            and other.segments
+            and self.segments[-1][1] == other.segments[0][1]
+        ):
+            result.segments = (
+                self.segments[:-1]
+                + [(self.segments[-1][0] + other.segments[0][0], self.segments[-1][1])]
+                + other.segments[1:]
+            )
+        else:
+            result.segments = self.segments + other.segments
+        return result
+
+    def __radd__(self, other: Union["Text", str]) -> "Text":
+        if isinstance(other, str):
+            other = Text(other)
+        return other + self
+
+    def finditer(self, pat: Pattern[str]) -> Iterator[Match[str]]:
+        """Replacement for `pat.finditer(text)` that operates on the inner text,
+        and returns the exact same matches as `Text.sub(pat, ...)`."""
+        for chunk, f in self.segments:
+            for match in pat.finditer(chunk):
+                yield match
+
+    def sub(self, pat: Pattern[str], sub_fn: Callable[[Match[str]], "Text"]) -> "Text":
+        result = Text()
+        for chunk, f in self.segments:
+            i = 0
+            for match in pat.finditer(chunk):
+                start, end = match.start(), match.end()
+                assert i <= start <= end <= len(chunk)
+                sub = sub_fn(match)
+                if i != start:
+                    result.segments.append((chunk[i:start], f))
+                result.segments.extend(sub.segments)
+                i = end
+            if chunk[i:]:
+                result.segments.append((chunk[i:], f))
+        return result
+
+    def ljust(self, column_width: int) -> "Text":
+        length = sum(len(x) for x, _ in self.segments)
+        return self + " " * max(column_width - length, 0)
+
+
+@dataclass
+class TableLine:
+    key: Optional[str]
+    is_data_ref: bool
+    cells: Tuple[Tuple[Text, Optional["Line"]], ...]
+
+
+@dataclass
+class TableData:
+    headers: Tuple[Text, ...]
+    current_score: int
+    max_score: int
+    previous_score: Optional[int]
+    lines: List[TableLine]
+
+
+class Formatter(abc.ABC):
+    @abc.abstractmethod
+    def apply_format(self, chunk: str, f: Format) -> str:
+        """Apply the formatting `f` to `chunk` and escape the contents."""
+        ...
+
+    @abc.abstractmethod
+    def table(self, data: TableData) -> str:
+        """Format a multi-column table with metadata"""
+        ...
+
+    def apply(self, text: Text) -> str:
+        return "".join(self.apply_format(chunk, f) for chunk, f in text.segments)
+
+    @staticmethod
+    def outputline_texts(line: TableLine) -> Tuple[Text, ...]:
+        return tuple(cell[0] for cell in line.cells)
+
+
+@dataclass
+class PlainFormatter(Formatter):
+    column_width: int
+
+    def apply_format(self, chunk: str, f: Format) -> str:
+        return chunk
+
+    def table(self, data: TableData) -> str:
+        rows = [data.headers] + [self.outputline_texts(line) for line in data.lines]
+        return "\n".join(
+            "".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows
+        )
+
+
+@dataclass
+class AnsiFormatter(Formatter):
+    # Additional ansi escape codes not in colorama. See:
+    # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters
+    STYLE_UNDERLINE = "\x1b[4m"
+    STYLE_NO_UNDERLINE = "\x1b[24m"
+    STYLE_INVERT = "\x1b[7m"
+    STYLE_RESET = "\x1b[0m"
+
+    BASIC_ANSI_CODES = {
+        BasicFormat.NONE: "",
+        BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX,
+        BasicFormat.STACK: Fore.YELLOW,
+        BasicFormat.REGISTER: Fore.YELLOW,
+        BasicFormat.REGISTER_CATEGORY: Fore.LIGHTYELLOW_EX,
+        BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX,
+        BasicFormat.DIFF_ADD: Fore.GREEN,
+        BasicFormat.DIFF_REMOVE: Fore.RED,
+        BasicFormat.SOURCE_FILENAME: Style.DIM + Style.BRIGHT,
+        BasicFormat.SOURCE_FUNCTION: Style.DIM + Style.BRIGHT + STYLE_UNDERLINE,
+        BasicFormat.SOURCE_LINE_NUM: Fore.LIGHTBLACK_EX,
+        BasicFormat.SOURCE_OTHER: Style.DIM,
+    }
+
+    BASIC_ANSI_CODES_UNDO = {
+        BasicFormat.NONE: "",
+        BasicFormat.SOURCE_FILENAME: Style.NORMAL,
+        BasicFormat.SOURCE_FUNCTION: Style.NORMAL + STYLE_NO_UNDERLINE,
+        BasicFormat.SOURCE_OTHER: Style.NORMAL,
+    }
+
+    ROTATION_ANSI_COLORS = [
+        Fore.MAGENTA,
+        Fore.CYAN,
+        Fore.GREEN,
+        Fore.RED,
+        Fore.LIGHTYELLOW_EX,
+        Fore.LIGHTMAGENTA_EX,
+        Fore.LIGHTCYAN_EX,
+        Fore.LIGHTGREEN_EX,
+        Fore.LIGHTBLACK_EX,
+    ]
+
+    column_width: int
+
+    def apply_format(self, chunk: str, f: Format) -> str:
+        if f == BasicFormat.NONE:
+            return chunk
+        undo_ansi_code = Fore.RESET
+        if isinstance(f, BasicFormat):
+            ansi_code = self.BASIC_ANSI_CODES[f]
+            undo_ansi_code = self.BASIC_ANSI_CODES_UNDO.get(f, undo_ansi_code)
+        elif isinstance(f, RotationFormat):
+            ansi_code = self.ROTATION_ANSI_COLORS[
+                f.index % len(self.ROTATION_ANSI_COLORS)
+            ]
+        else:
+            static_assert_unreachable(f)
+        return f"{ansi_code}{chunk}{undo_ansi_code}"
+
+    def table(self, data: TableData) -> str:
+        rows = [(data.headers, False)] + [
+            (
+                self.outputline_texts(line),
+                line.is_data_ref,
+            )
+            for line in data.lines
+        ]
+        return "\n".join(
+            "".join(
+                (self.STYLE_INVERT if is_data_ref else "")
+                + self.apply(x.ljust(self.column_width))
+                + (self.STYLE_RESET if is_data_ref else "")
+                for x in row
+            )
+            for (row, is_data_ref) in rows
+        )
+
+
+@dataclass
+class HtmlFormatter(Formatter):
+    rotation_formats: int = 9
+
+    def apply_format(self, chunk: str, f: Format) -> str:
+        chunk = html.escape(chunk)
+        if f == BasicFormat.NONE:
+            return chunk
+        if isinstance(f, BasicFormat):
+            class_name = f.name.lower().replace("_", "-")
+            data_attr = ""
+        elif isinstance(f, RotationFormat):
+            class_name = f"rotation-{f.index % self.rotation_formats}"
+            rotation_key = html.escape(f"{f.group};{f.key}", quote=True)
+            data_attr = f'data-rotation="{rotation_key}"'
+        else:
+            static_assert_unreachable(f)
+        return f"<span class='{class_name}' {data_attr}>{chunk}</span>"
+
+    def table(self, data: TableData) -> str:
+        def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str:
+            tr_attrs = " class='data-ref'" if is_data_ref else ""
+            output_row = f"    <tr{tr_attrs}>"
+            for cell in line:
+                cell_html = self.apply(cell)
+                output_row += f"<{cell_el}>{cell_html}</{cell_el}>"
+            output_row += "</tr>\n"
+            return output_row
+
+        output = "<table class='diff'>\n"
+        output += "  <thead>\n"
+        output += table_row(data.headers, False, "th")
+        output += "  </thead>\n"
+        output += "  <tbody>\n"
+        output += "".join(
+            table_row(self.outputline_texts(line), line.is_data_ref, "td")
+            for line in data.lines
+        )
+        output += "  </tbody>\n"
+        output += "</table>\n"
+        return output
+
+
+@dataclass
+class PythonFormatter(Formatter):
+    arch_str: str
+
+    def apply_format(self, chunk: str, f: Format) -> str:
+        # This method is unused by this formatter
+        return NotImplemented
+
+    def table(self, data: TableData) -> str:
+        # This method is unused by this formatter
+        return NotImplemented
+
+    def raw(self, data: TableData) -> Dict[str, Any]:
+        def serialize_format(s: str, f: Format) -> Dict[str, Any]:
+            if f == BasicFormat.NONE:
+                return {"text": s}
+            elif isinstance(f, BasicFormat):
+                return {"text": s, "format": f.name.lower()}
+            elif isinstance(f, RotationFormat):
+                attrs = asdict(f)
+                attrs.update({"text": s, "format": "rotation"})
+                return attrs
+            else:
+                static_assert_unreachable(f)
+
+        def serialize(text: Optional[Text]) -> List[Dict[str, Any]]:
+            if text is None:
+                return []
+            return [serialize_format(s, f) for s, f in text.segments]
+
+        output: Dict[str, Any] = {}
+        output["arch_str"] = self.arch_str
+        output["header"] = {
+            name: serialize(h)
+            for h, name in zip(data.headers, ("base", "current", "previous"))
+        }
+        output["current_score"] = data.current_score
+        output["max_score"] = data.max_score
+        if data.previous_score is not None:
+            output["previous_score"] = data.previous_score
+        output_rows: List[Dict[str, Any]] = []
+        for row in data.lines:
+            output_row: Dict[str, Any] = {}
+            output_row["key"] = row.key
+            output_row["is_data_ref"] = row.is_data_ref
+            iters: List[Tuple[str, Text, Optional[Line]]] = [
+                (label, *cell)
+                for label, cell in zip(("base", "current", "previous"), row.cells)
+            ]
+            if all(line is None for _, _, line in iters):
+                # Skip rows that were only for displaying source code
+                continue
+            for column_name, text, line in iters:
+                column: Dict[str, Any] = {}
+                column["text"] = serialize(text)
+                if line:
+                    if line.mnemonic is not None:
+                        column["mnemonic"] = line.mnemonic
+                    if line.symbol is not None:
+                        column["symbol"] = line.symbol
+                    if line.line_num is not None:
+                        column["line"] = line.line_num
+                    if line.branch_target is not None:
+                        column["branch"] = line.branch_target
+                    if line.source_lines:
+                        column["src"] = line.source_lines
+                    if line.comment is not None:
+                        column["src_comment"] = line.comment
+                    if line.source_line_num is not None:
+                        column["src_line"] = line.source_line_num
+                if line or column["text"]:
+                    output_row[column_name] = column
+            output_rows.append(output_row)
+        output["rows"] = output_rows
+        return output
+
+
+@dataclass
+class JsonFormatter(PythonFormatter):
+    def table(self, data: TableData) -> str:
+        output = super().raw(data)
+        return json.dumps(output)
+
+
+def format_fields(
+    pat: Pattern[str],
+    out1: Text,
+    out2: Text,
+    color1: FormatFunction,
+    color2: Optional[FormatFunction] = None,
+) -> Tuple[Text, Text]:
+    diffs = [
+        of.group() != nf.group()
+        for (of, nf) in zip(out1.finditer(pat), out2.finditer(pat))
+    ]
+
+    it = iter(diffs)
+
+    def maybe_color(color: FormatFunction, s: str) -> Text:
+        return Text(s, color(s)) if next(it, False) else Text(s)
+
+    out1 = out1.sub(pat, lambda m: maybe_color(color1, m.group()))
+    it = iter(diffs)
+    out2 = out2.sub(pat, lambda m: maybe_color(color2 or color1, m.group()))
+
+    return out1, out2
+
+
+def symbol_formatter(group: str, base_index: int) -> FormatFunction:
+    symbol_formats: Dict[str, Format] = {}
+
+    def symbol_format(s: str) -> Format:
+        # TODO: it would be nice to use a unique Format for each symbol, so we could
+        # add extra UI elements in the HTML version
+        f = symbol_formats.get(s)
+        if f is None:
+            index = len(symbol_formats) + base_index
+            f = RotationFormat(key=s, index=index, group=group)
+            symbol_formats[s] = f
+        return f
+
+    return symbol_format
+
+
+# ==== LOGIC ====
+
+ObjdumpCommand = Tuple[List[str], str, Optional[str]]
+
+# eval_expr adapted from https://stackoverflow.com/a/9558001
+
+import ast
+import operator as op
+
+operators: Dict[Type[Union[ast.operator, ast.unaryop]], Any] = {
+    ast.Add: op.add,
+    ast.Sub: op.sub,
+    ast.Mult: op.mul,
+    ast.Div: op.floordiv,
+    ast.USub: op.neg,
+    ast.Pow: op.pow,
+    ast.BitXor: op.xor,
+    ast.BitOr: op.or_,
+    ast.BitAnd: op.and_,
+    ast.Invert: op.inv,
+}
+
+
+def eval_expr(expr: str) -> Any:
+    return eval_(ast.parse(expr, mode="eval").body)
+
+
+def eval_(node: ast.AST) -> Any:
+    if (
+        hasattr(ast, "Constant")
+        and isinstance(node, ast.Constant)
+        and isinstance(node.value, int)
+    ):  # Python 3.8+
+        return node.value
+    elif isinstance(node, ast.BinOp):
+        return operators[type(node.op)](eval_(node.left), eval_(node.right))
+    elif isinstance(node, ast.UnaryOp):
+        return operators[type(node.op)](eval_(node.operand))
+    elif sys.version_info < (3, 8) and isinstance(node, ast.Num):
+        return node.n
+    else:
+        raise TypeError(node)
+
+
+def maybe_eval_int(expr: str) -> Optional[int]:
+    try:
+        ret = eval_expr(expr)
+        if not isinstance(ret, int):
+            raise Exception("not an integer")
+        return ret
+    except Exception:
+        return None
+
+
+def eval_int(expr: str, emsg: str) -> int:
+    ret = maybe_eval_int(expr)
+    if ret is None:
+        fail(emsg)
+    return ret
+
+
+def eval_line_num(expr: str) -> Optional[int]:
+    expr = expr.strip().replace(":", "")
+    if expr == "":
+        return None
+    return int(expr, 16)
+
+
+def run_make(target: str, project: ProjectSettings) -> None:
+    subprocess.check_call(project.build_command + [target])
+
+
+def run_make_capture_output(
+    target: str, project: ProjectSettings
+) -> "subprocess.CompletedProcess[bytes]":
+    return subprocess.run(
+        project.build_command + [target],
+        stderr=subprocess.PIPE,
+        stdout=subprocess.PIPE,
+    )
+
+
+def restrict_to_function(dump: str, fn_name: str) -> str:
+    try:
+        # Find the start of the line that contains "<fn_name>:"
+        ind = dump.rfind("\n", 0, dump.index(f"<{fn_name}>:")) + 1
+        return dump[ind:]
+    except ValueError:
+        return ""
+
+
+def serialize_rodata_references(references: List[Tuple[int, int, str]]) -> str:
+    return "".join(
+        f"DATAREF {text_offset} {from_offset} {from_section}\n"
+        for (text_offset, from_offset, from_section) in references
+    )
+
+
+def maybe_get_objdump_source_flags(config: Config) -> List[str]:
+    flags = []
+
+    if config.show_line_numbers or config.show_source:
+        flags.append("--line-numbers")
+
+    if config.show_source:
+        flags.append("--source")
+
+        if not config.source_old_binutils:
+            flags.append("--source-comment=│ ")
+
+        if config.inlines:
+            flags.append("--inlines")
+
+    return flags
+
+
+def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -> str:
+    flags, target, restrict = cmd
+    try:
+        out = subprocess.run(
+            [project.objdump_executable]
+            + config.arch.arch_flags
+            + project.objdump_flags
+            + flags
+            + [target],
+            check=True,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            universal_newlines=True,
+        ).stdout
+    except subprocess.CalledProcessError as e:
+        print(e.stdout)
+        print(e.stderr)
+        if "unrecognized option '--source-comment" in e.stderr:
+            fail("** Try using --source-old-binutils instead of --source **")
+        raise e
+
+    obj_data: Optional[bytes] = None
+    if config.diff_obj:
+        with open(target, "rb") as f:
+            obj_data = f.read()
+
+    return preprocess_objdump_out(restrict, obj_data, out, config)
+
+
+def preprocess_objdump_out(
+    restrict: Optional[str], obj_data: Optional[bytes], objdump_out: str, config: Config
+) -> str:
+    """
+    Preprocess the output of objdump into a format that `process()` expects.
+    This format is suitable for saving to disk with `--write-asm`.
+
+    - Optionally filter the output to a single function (`restrict`)
+    - Otherwise, strip objdump header (6 lines)
+    - Prepend .data references ("DATAREF" lines) when working with object files
+    """
+    out = objdump_out
+
+    if restrict is not None:
+        out = restrict_to_function(out, restrict)
+    else:
+        for i in range(6):
+            out = out[out.find("\n") + 1 :]
+        out = out.rstrip("\n")
+
+    if obj_data and config.show_rodata_refs:
+        out = (
+            serialize_rodata_references(parse_elf_rodata_references(obj_data, config))
+            + out
+        )
+
+    processor = config.arch.proc(config)
+    return processor.preprocess_objdump(out)
+
+
+def search_build_objects(objname: str, project: ProjectSettings) -> Optional[str]:
+    objfiles = [
+        os.path.join(dirpath, f)
+        for dirpath, _, filenames in os.walk(project.build_dir)
+        for f in filenames
+        if f == objname
+    ]
+    if len(objfiles) > 1:
+        all_objects = "\n".join(objfiles)
+        fail(
+            f"Found multiple objects of the same name {objname} in {project.build_dir}, "
+            f"cannot determine which to diff against: \n{all_objects}"
+        )
+    if len(objfiles) == 1:
+        return objfiles[0]
+
+    return None
+
+
+def search_map_file(
+    fn_name: str, project: ProjectSettings, config: Config, *, for_binary: bool
+) -> Tuple[Optional[str], Optional[int]]:
+    if not project.mapfile:
+        fail(f"No map file configured; cannot find function {fn_name}.")
+
+    try:
+        with open(
+            project.mapfile,
+            encoding=MAPFILE_ENCODING,
+            errors=MAPFILE_ENCODING_ERROR_HANDLER,
+        ) as f:
+            contents = f.read()
+    except Exception:
+        fail(f"Failed to open map file {project.mapfile} for reading.")
+
+    if project.map_format == "gnu":
+        if for_binary and "load address" not in contents:
+            fail(
+                'Failed to find "load address" in map file. Maybe you need to add\n'
+                '"export LANG := C" to your Makefile to avoid localized output?'
+            )
+
+        lines = contents.split("\n")
+
+        try:
+            cur_objfile = None
+            ram_to_rom = None
+            cands = []
+            last_line = ""
+            for line in lines:
+                if line.startswith(" " + config.diff_section):
+                    cur_objfile = line.split()[3]
+                if "load address" in line:
+                    tokens = last_line.split() + line.split()
+                    ram = int(tokens[1], 0)
+                    rom = int(tokens[5], 0)
+                    ram_to_rom = rom - ram
+                if line.endswith(" " + fn_name) or f" {fn_name} = 0x" in line:
+                    ram = int(line.split()[0], 0)
+                    if (for_binary and ram_to_rom is not None) or (
+                        not for_binary and cur_objfile is not None
+                    ):
+                        cands.append((cur_objfile, ram + (ram_to_rom or 0)))
+                last_line = line
+        except Exception as e:
+            traceback.print_exc()
+            fail(f"Internal error while parsing map file")
+
+        if len(cands) > 1:
+            fail(f"Found multiple occurrences of function {fn_name} in map file.")
+        if len(cands) == 1:
+            return cands[0]
+    elif project.map_format == "mw":
+        find = re.findall(
+            # start address, size, virtual address, file offset, alignment
+            r"  [0-9a-f]{8} [0-9a-f]{6} ([0-9a-f]{8})(?: ([0-9a-f]{8}))?(?: +\S{1,2})? +"
+            + re.escape(fn_name)
+            + r"(?: \(entry of "
+            + re.escape(config.diff_section)
+            + r"\))? \t"
+            # object name
+            + r"(\S+)",
+            contents,
+        )
+        if len(find) > 1:
+            fail(f"Found multiple occurrences of function {fn_name} in map file.")
+        if len(find) == 1:
+            if find[0][1]:
+                rom = int(find[0][1], 16)
+            else:
+                rom = None
+            objname = find[0][2]
+            objfile = search_build_objects(objname, project)
+
+            # TODO Currently the ram-rom conversion only works for diffing ELF
+            # executables, but it would likely be more convenient to diff DOLs.
+            # At this time it is recommended to always use -o when running the diff
+            # script as this mode does not make use of the ram-rom conversion.
+            if objfile is not None:
+                return objfile, rom
+    elif project.map_format == "ms":
+        load_address_find = re.search(
+            r"Preferred load address is ([0-9a-f]+)",
+            contents,
+        )
+        if not load_address_find:
+            fail(f"Couldn't find module load address in map file.")
+        load_address = int(load_address_find.group(1), 16)
+
+        diff_segment_find = re.search(
+            r"([0-9a-f]+):[0-9a-f]+ [0-9a-f]+H " + re.escape(config.diff_section),
+            contents,
+        )
+        if not diff_segment_find:
+            fail(f"Couldn't find segment for section in map file.")
+        diff_segment = diff_segment_find.group(1)
+
+        find = re.findall(
+            r" (?:"
+            + re.escape(diff_segment)
+            + r")\S+\s+(?:"
+            + re.escape(fn_name)
+            + r")\s+\S+ ... \S+",
+            contents,
+        )
+        if len(find) > 1:
+            fail(f"Found multiple occurrences of function {fn_name} in map file.")
+        if len(find) == 1:
+            names_find = re.search(r"(\S+) ... (\S+)", find[0])
+            assert names_find is not None
+            fileofs = int(names_find.group(1), 16) - load_address
+            if for_binary:
+                return None, fileofs
+
+            objname = names_find.group(2)
+            objfile = search_build_objects(objname, project)
+            if objfile is not None:
+                return objfile, fileofs
+    else:
+        fail(f"Linker map format {project.map_format} unrecognised.")
+    return None, None
+
+
+def parse_elf_rodata_references(
+    data: bytes, config: Config
+) -> List[Tuple[int, int, str]]:
+    e_ident = data[:16]
+    if e_ident[:4] != b"\x7FELF":
+        return []
+
+    SHT_SYMTAB = 2
+    SHT_REL = 9
+    SHT_RELA = 4
+    R_MIPS_32 = 2
+    R_MIPS_GPREL32 = 12
+
+    is_32bit = e_ident[4] == 1
+    is_little_endian = e_ident[5] == 1
+    str_end = "<" if is_little_endian else ">"
+    str_off = "I" if is_32bit else "Q"
+
+    def read(spec: str, offset: int) -> Tuple[int, ...]:
+        spec = spec.replace("P", str_off)
+        size = struct.calcsize(spec)
+        return struct.unpack(str_end + spec, data[offset : offset + size])
+
+    (
+        e_type,
+        e_machine,
+        e_version,
+        e_entry,
+        e_phoff,
+        e_shoff,
+        e_flags,
+        e_ehsize,
+        e_phentsize,
+        e_phnum,
+        e_shentsize,
+        e_shnum,
+        e_shstrndx,
+    ) = read("HHIPPPIHHHHHH", 16)
+    if e_type != 1:  # relocatable
+        return []
+    assert e_shoff != 0
+    assert e_shnum != 0  # don't support > 0xFF00 sections
+    assert e_shstrndx != 0
+
+    @dataclass
+    class Section:
+        sh_name: int
+        sh_type: int
+        sh_flags: int
+        sh_addr: int
+        sh_offset: int
+        sh_size: int
+        sh_link: int
+        sh_info: int
+        sh_addralign: int
+        sh_entsize: int
+
+    sections = [
+        Section(*read("IIPPPPIIPP", e_shoff + i * e_shentsize)) for i in range(e_shnum)
+    ]
+    shstr = sections[e_shstrndx]
+    sec_name_offs = [shstr.sh_offset + s.sh_name for s in sections]
+    sec_names = [data[offset : data.index(b"\0", offset)] for offset in sec_name_offs]
+
+    symtab_sections = [i for i in range(e_shnum) if sections[i].sh_type == SHT_SYMTAB]
+    assert len(symtab_sections) == 1
+    symtab = sections[symtab_sections[0]]
+
+    section_name = config.diff_section.encode("utf-8")
+    text_sections = [
+        i
+        for i in range(e_shnum)
+        if sec_names[i] == section_name and sections[i].sh_size != 0
+    ]
+    if len(text_sections) != 1:
+        return []
+    text_section = text_sections[0]
+
+    ret: List[Tuple[int, int, str]] = []
+    for s in sections:
+        if s.sh_type == SHT_REL or s.sh_type == SHT_RELA:
+            if s.sh_info == text_section:
+                # Skip section_name -> section_name references
+                continue
+            sec_name = sec_names[s.sh_info].decode("latin1")
+            if sec_name not in (".rodata", ".late_rodata"):
+                continue
+            sec_base = sections[s.sh_info].sh_offset
+            for i in range(0, s.sh_size, s.sh_entsize):
+                if s.sh_type == SHT_REL:
+                    r_offset, r_info = read("PP", s.sh_offset + i)
+                else:
+                    r_offset, r_info, r_addend = read("PPP", s.sh_offset + i)
+
+                if is_32bit:
+                    r_sym = r_info >> 8
+                    r_type = r_info & 0xFF
+                    sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym
+                    st_name, st_value, st_size, st_info, st_other, st_shndx = read(
+                        "IIIBBH", sym_offset
+                    )
+                else:
+                    r_sym = r_info >> 32
+                    r_type = r_info & 0xFFFFFFFF
+                    sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym
+                    st_name, st_info, st_other, st_shndx, st_value, st_size = read(
+                        "IBBHQQ", sym_offset
+                    )
+                if st_shndx == text_section:
+                    if s.sh_type == SHT_REL:
+                        if e_machine == 8 and r_type in (R_MIPS_32, R_MIPS_GPREL32):
+                            (r_addend,) = read("I", sec_base + r_offset)
+                        else:
+                            continue
+                    text_offset = (st_value + r_addend) & 0xFFFFFFFF
+                    ret.append((text_offset, r_offset, sec_name))
+    return ret
+
+
+def dump_elf(
+    start: str,
+    end: Optional[str],
+    diff_elf_symbol: str,
+    config: Config,
+    project: ProjectSettings,
+) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
+    if not project.baseimg or not project.myimg:
+        fail("Missing myimg/baseimg in config.")
+    if config.base_shift:
+        fail("--base-shift not compatible with -e")
+
+    start_addr = eval_int(start, "Start address must be an integer expression.")
+
+    if end is not None:
+        end_addr = eval_int(end, "End address must be an integer expression.")
+    else:
+        end_addr = start_addr + config.max_function_size_bytes
+
+    flags1 = [
+        f"--start-address={start_addr}",
+        f"--stop-address={end_addr}",
+    ]
+
+    if project.disassemble_all:
+        disassemble_flag = "-D"
+    else:
+        disassemble_flag = "-d"
+
+    if "llvm-" in project.objdump_executable:
+        flags2 = ["--disassemble", f"--disassemble-symbols={diff_elf_symbol}"]
+    else:
+        flags2 = [
+            f"--disassemble={diff_elf_symbol}",
+        ]
+
+    objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
+    return (
+        project.myimg,
+        (objdump_flags + flags1, project.baseimg, None),
+        (
+            objdump_flags + flags2 + maybe_get_objdump_source_flags(config),
+            project.myimg,
+            None,
+        ),
+    )
+
+
+def dump_objfile(
+    start: str, end: Optional[str], config: Config, project: ProjectSettings
+) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
+    if config.base_shift:
+        fail("--base-shift not compatible with -o")
+    if end is not None:
+        fail("end address not supported together with -o")
+    if start.startswith("0"):
+        fail("numerical start address not supported with -o; pass a function name")
+
+    objfile = config.file
+    if not objfile:
+        objfile, _ = search_map_file(start, project, config, for_binary=False)
+
+    if not objfile:
+        fail("Not able to find .o file for function.")
+
+    if config.make:
+        run_make(objfile, project)
+
+    if not os.path.isfile(objfile):
+        fail(f"Not able to find .o file for function: {objfile} is not a file.")
+
+    refobjfile = os.path.join(project.expected_dir, objfile)
+    if config.diff_mode != DiffMode.SINGLE and not os.path.isfile(refobjfile):
+        fail(f'Please ensure an OK .o file exists at "{refobjfile}".')
+
+    if project.disassemble_all:
+        disassemble_flag = "-D"
+    else:
+        disassemble_flag = "-d"
+
+    objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
+    return (
+        objfile,
+        (objdump_flags, refobjfile, start),
+        (objdump_flags + maybe_get_objdump_source_flags(config), objfile, start),
+    )
+
+
+def dump_binary(
+    start: str, end: Optional[str], config: Config, project: ProjectSettings
+) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
+    binfile = config.file or project.myimg
+    if not project.baseimg or not binfile:
+        fail("Missing myimg/baseimg in config.")
+    if config.make:
+        run_make(binfile, project)
+    if not os.path.isfile(binfile):
+        fail(f"Not able to find binary file: {binfile}")
+    start_addr = maybe_eval_int(start)
+    if start_addr is None and config.file is None:
+        _, start_addr = search_map_file(start, project, config, for_binary=True)
+        if start_addr is None:
+            fail("Not able to find function in map file.")
+        start_addr += project.map_address_offset
+    elif start_addr is None:
+        fail("Start address must be an integer expression when using binary -f")
+    if end is not None:
+        end_addr = eval_int(end, "End address must be an integer expression.")
+    else:
+        end_addr = start_addr + config.max_function_size_bytes
+    objdump_flags = ["-Dz", "-bbinary"] + ["-EB" if config.arch.big_endian else "-EL"]
+    flags1 = [
+        f"--start-address={start_addr + config.base_shift}",
+        f"--stop-address={end_addr + config.base_shift}",
+    ]
+    flags2 = [f"--start-address={start_addr}", f"--stop-address={end_addr}"]
+    return (
+        binfile,
+        (objdump_flags + flags1, project.baseimg, None),
+        (objdump_flags + flags2, binfile, None),
+    )
+
+
+# The base class is a no-op.
+class AsmProcessor:
+    def __init__(self, config: Config) -> None:
+        self.config = config
+
+    # Called during run_objdump() for arch-specific normalization. Runs before
+    # diff-processing, i.e. process().
+    def preprocess_objdump(self, objdump: str) -> str:
+        return objdump
+
+    def pre_process(
+        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
+    ) -> Tuple[str, str]:
+        return mnemonic, args
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        return prev, None
+
+    def normalize(self, mnemonic: str, row: str) -> str:
+        """This should be called exactly once for each line."""
+        arch = self.config.arch
+        row = self._normalize_arch_specific(mnemonic, row)
+        if self.config.ignore_large_imms and mnemonic not in arch.branch_instructions:
+            row = re.sub(self.config.arch.re_large_imm, "<imm>", row)
+        return row
+
+    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
+        return row
+
+    def post_process(self, lines: List["Line"]) -> None:
+        return
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        return False
+
+
+class AsmProcessorMIPS(AsmProcessor):
+    def __init__(self, config: Config) -> None:
+        super().__init__(config)
+        self.seen_jr_ra = False
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        arch = self.config.arch
+        if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row:
+            # GNU as emits no-op relocations immediately after real ones when
+            # assembling with -mabi=64. Return without trying to parse 'imm' as an
+            # integer.
+            return prev, None
+        before, imm, after = parse_relocated_line(prev)
+        addend = reloc_addend_from_imm(imm, before, self.config.arch)
+        repl = row.split()[-1] + addend
+        if "R_MIPS_LO16" in row:
+            repl = f"%lo({repl})"
+        elif "R_MIPS_HI16" in row:
+            # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a
+            # correct addend for each, but objdump doesn't give us the order of
+            # the relocations, so we can't find the right LO16. :(
+            repl = f"%hi({repl})"
+        elif "R_MIPS_26" in row:
+            # Function calls
+            pass
+        elif "R_MIPS_PC16" in row:
+            # Branch to glabel. This gives confusing output, but there's not much
+            # we can do here.
+            pass
+        elif "R_MIPS_GPREL16" in row:
+            repl = f"%gp_rel({repl})"
+        elif "R_MIPS_GOT16" in row:
+            repl = f"%got({repl})"
+        elif "R_MIPS_CALL16" in row:
+            repl = f"%call16({repl})"
+        elif "R_MIPS_LITERAL" in row:
+            # MWCC emits R_MIPS_LITERAL for float literals which don't have the same structure as the
+            # relocations emitted by GCC
+            # Only remove the addend if it is found in the relocation
+            if addend != "":
+                # .lit4+0x4000-0x4000 -> .lit4+0x4000
+                repl = repl[: -len(addend)]
+        else:
+            assert False, f"unknown relocation type '{row}' for line '{prev}'"
+        return before + repl + after, repl
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        if self.seen_jr_ra:
+            return True
+        if mnemonic == "jr" and args == "ra":
+            self.seen_jr_ra = True
+        return False
+
+
+class AsmProcessorPPC(AsmProcessor):
+    def pre_process(
+        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
+    ) -> Tuple[str, str]:
+        if next_row and "R_PPC_EMB_SDA21" in next_row:
+            # With sda21 relocs, the linker transforms `r0` into `r2`/`r13`, and
+            # we may encounter this in either pre-transformed or post-transformed
+            # versions depending on if the .o file comes from compiler output or
+            # from disassembly. Normalize, to make sure both forms are treated as
+            # equivalent.
+
+            args = args.replace("(r2)", "(0)")
+            args = args.replace("(r13)", "(0)")
+            args = args.replace(",r2,", ",0,")
+            args = args.replace(",r13,", ",0,")
+
+            # We want to convert li and lis with an sda21 reloc,
+            # because the r0 to r2/r13 transformation results in
+            # turning an li/lis into an addi/addis with r2/r13 arg
+            # our preprocessing normalizes all versions to addi with a 0 arg
+            if mnemonic in {"li", "lis"}:
+                mnemonic = mnemonic.replace("li", "addi")
+                args_parts = args.split(",")
+                args = args_parts[0] + ",0," + args_parts[1]
+        if (
+            next_row
+            and ("R_PPC_REL24" in next_row or "R_PPC_REL14" in next_row)
+            and ".text+0x" in next_row
+            and mnemonic in PPC_BRANCH_INSTRUCTIONS
+        ):
+            # GCC emits a relocation of "R_PPC_REL14" or "R_PPC_REL24" with a .text offset
+            # fixup the args to use the offset from the relocation
+
+            # Split args by ',' which will result in either [cr, offset] or [offset]
+            # Replace the current offset with the next line's ".text+0x" offset
+            splitArgs = args.split(",")
+            splitArgs[-1] = next_row.split(".text+0x")[-1]
+            args = ",".join(splitArgs)
+
+        if (
+            comment is not None
+            and (
+                next_row is None
+                or re.search(self.config.arch.re_reloc, next_row) is None
+            )
+            and mnemonic == "bl"
+        ):
+            # if the mnemonic is bl and the comment doesn't match
+            # <.text+0x...> replace the args with the contents of the comment
+            if re.search(r"<.+\+0x[0-9a-fA-F]+>", comment) is None:
+                args = comment[1:-1]
+
+        return mnemonic, args
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        # row is the line with the relocations
+        # prev is the line to apply relocations to
+
+        arch = self.config.arch
+        assert any(
+            r in row
+            for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21", "R_PPC_REL14"]
+        ), f"unknown relocation type '{row}' for line '{prev}'"
+        before, imm, after = parse_relocated_line(prev)
+        repl = row.split()[-1]
+        mnemonic, args = prev.split(maxsplit=1)
+
+        if "R_PPC_REL24" in row:
+            # function calls
+            # or unconditional branches generated by GCC "b offset"
+            if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row:
+                # this has been handled in pre_process
+                return prev, None
+        elif "R_PPC_REL14" in row:
+            if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row:
+                # this has been handled in pre_process
+                return prev, None
+        elif "R_PPC_ADDR16_HI" in row:
+            # absolute hi of addr
+            repl = f"{repl}@h"
+        elif "R_PPC_ADDR16_HA" in row:
+            # adjusted hi of addr
+            repl = f"{repl}@ha"
+        elif "R_PPC_ADDR16_LO" in row:
+            # lo of addr
+            repl = f"{repl}@l"
+        elif "R_PPC_ADDR16" in row:
+            # 16-bit absolute addr
+            if "+0x7" in repl:
+                # remove the very large addends as they are an artifact of (label-_SDA(2)_BASE_)
+                # computations and are unimportant in a diff setting.
+                if int(repl.split("+")[1], 16) > 0x70000000:
+                    repl = repl.split("+")[0]
+        elif "R_PPC_EMB_SDA21" in row:
+            # sda21 relocations; r2/r13 --> 0 swaps are performed in pre_process
+            repl = f"{repl}@sda21"
+
+        return before + repl + after, repl
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        return mnemonic == "blr"
+
+
+# Example: "cmp r0, #0x10"
+ARM32_COMPARE_IMM_PATTERN = r"cmp\s+(r[0-9]|1[0-3]),\s+#(\w+)"
+
+# Example: "add pc, r1"
+ARM32_JUMP_TABLE_START = r"add\s+pc,\s*r"
+
+# Examples:
+#  - "44:   00060032    .word   0x00060032"
+#  - "48:   0032        .short  0x0032"
+#  - "4a:   0032        movs    r2, r6"
+#  - "9e:   00be        lsls    r6, r7, #2"
+#  - ".short  0x0032   ; 0x64"
+ARM32_JUMP_TABLE_ENTRY_PATTERN = r"(?:(\w+):\s+([0-9a-f]+)\s+)?([\w\.]+)\s+([\w,\ ]+)"
+
+# Example: "ldr r4, [pc, #56]    ; (4c <AddCoins+0x4c>)"
+ARM32_LOAD_POOL_PATTERN = r"(ldr\s+r([0-9]|1[0-3]),\s+\[pc,.*;\s*)(\([a-fA-F0-9]+.*\))"
+
+
+class AsmProcessorARM32(AsmProcessor):
+    @dataclass
+    class JumpTableEntry:
+        cur_addr: int
+        table_start_addr: int
+        value: int
+        is_word: bool
+
+    def preprocess_objdump(self, objdump: str) -> str:
+        def short_table_entry(
+            cur_addr: int, jump_table_start_addr: int, value: int
+        ) -> str:
+            branch_target = jump_table_start_addr + value + 4
+            return f"  {cur_addr:x}:	{value:04x}      	.short	0x{value:04x}  ; 0x{branch_target:x}"
+
+        new_lines = []
+        lines = objdump.splitlines()
+        for i, jump_table_entry in self._lines_iterator(lines):
+            if jump_table_entry is None:
+                new_lines.append(lines[i])
+                continue
+
+            entry = jump_table_entry
+            if entry.is_word:
+                # Split into two ".short" entries.
+                hi, lo = entry.value >> 16, entry.value & 0xFFFF
+                new_lines.append(
+                    short_table_entry(entry.cur_addr, entry.table_start_addr, lo)
+                )
+                new_lines.append(
+                    short_table_entry(entry.cur_addr + 2, entry.table_start_addr, hi)
+                )
+            else:
+                new_lines.append(
+                    short_table_entry(
+                        entry.cur_addr, entry.table_start_addr, entry.value
+                    )
+                )
+        return "\n".join(new_lines)
+
+    # An iterator for each line of assembly, returning the line index and optional
+    # metadata if the line is a jump table entry.
+    def _lines_iterator(
+        self, lines: List[str]
+    ) -> Iterator[Tuple[int, Optional[JumpTableEntry]]]:
+        jump_table_entries = 0
+        table_start_addr = 0
+        for i, line in enumerate(lines):
+            addr_match = re.match(r"^\s*([0-9a-f]+):", line)
+            addr = int(addr_match.group(1), 16) if addr_match else -1
+            entry_match = re.search(ARM32_JUMP_TABLE_ENTRY_PATTERN, line)
+            if jump_table_entries > 0 and entry_match:
+                value = (
+                    entry_match.group(4)
+                    if is_hexstring(entry_match.group(4))
+                    else entry_match.group(2)
+                )
+                table_entry = self.JumpTableEntry(
+                    cur_addr=addr,
+                    table_start_addr=table_start_addr,
+                    value=int(value, 16),
+                    is_word=entry_match.group(3) == ".word",
+                )
+                jump_table_entries -= 2 if table_entry.is_word else 1
+
+                yield i, table_entry
+                continue
+
+            # Check for jump tables.
+            if re.search(ARM32_JUMP_TABLE_START, line):
+                jump_table_entries = self._jump_table_entries_count(lines, i)
+                table_start_addr = addr
+            yield i, None
+
+    # Returns the number of entries in the jump table starting at `line_no`, or
+    # 0 if it's not a jump table.
+    def _jump_table_entries_count(self, raw_lines: List[str], line_no: int) -> int:
+        # The number of entries should be in the most recent `cmp` before the
+        # jump table.
+        for i in reversed(range(line_no)):
+            cmp_match = re.search(ARM32_COMPARE_IMM_PATTERN, raw_lines[i])
+            if cmp_match:
+                value = immediate_to_int(cmp_match.group(2))
+                if value > 0:
+                    return value + 1
+        return 0
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        arch = self.config.arch
+        if "R_ARM_V4BX" in row:
+            # R_ARM_V4BX converts "bx <reg>" to "mov pc,<reg>" for some targets.
+            # Ignore for now.
+            return prev, None
+        if "R_ARM_ABS32" in row and not prev.startswith(".word"):
+            # Don't crash on R_ARM_ABS32 relocations incorrectly applied to code.
+            # (We may want to do something more fancy here that actually shows the
+            # related symbol, but this serves as a stop-gap.)
+            return prev, None
+        before, imm, after = parse_relocated_line(prev)
+        repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch)
+        return before + repl + after, repl
+
+    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
+        if self.config.ignore_addr_diffs:
+            row = self._normalize_bl(mnemonic, row)
+        row = self._normalize_data_pool(row)
+        return row
+
+    def _normalize_bl(self, mnemonic: str, row: str) -> str:
+        if mnemonic != "bl":
+            return row
+
+        row, _ = split_off_address(row)
+        return row + "<ignore>"
+
+    def _normalize_data_pool(self, row: str) -> str:
+        pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row)
+        return pool_match.group(1) if pool_match else row
+
+    def _post_process_jump_tables(self, lines: List["Line"]) -> None:
+        raw_lines = [
+            (
+                f"{line.line_num:x}: {line.original}"
+                if line.line_num is not None
+                else line.original
+            )
+            for line in lines
+        ]
+        for i, jump_table_entry in self._lines_iterator(raw_lines):
+            if jump_table_entry is None:
+                continue
+
+            entry = jump_table_entry
+            lines[i].branch_target = entry.table_start_addr + entry.value + 4
+
+    def _post_process_data_pools(self, lines: List["Line"]) -> None:
+        lines_by_line_number = {}
+        for line in lines:
+            if line.line_num is not None:
+                lines_by_line_number[line.line_num] = line
+        for line in lines:
+            if line.data_pool_addr is None:
+                continue
+
+            # Add data symbol and its address to the line.
+            line_original = lines_by_line_number[line.data_pool_addr].original
+            value = line_original.split()[1]
+            addr = "{:x}".format(line.data_pool_addr)
+            line.original = line.normalized_original + f"={value} ({addr})"
+
+    def post_process(self, lines: List["Line"]) -> None:
+        self._post_process_jump_tables(lines)
+        self._post_process_data_pools(lines)
+
+
+class AsmProcessorAArch64(AsmProcessor):
+    def __init__(self, config: Config) -> None:
+        super().__init__(config)
+        self._adrp_pair_registers: Set[str] = set()
+
+    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
+        if self.config.ignore_addr_diffs:
+            row = self._normalize_adrp_differences(mnemonic, row)
+            row = self._normalize_bl(mnemonic, row)
+        return row
+
+    def _normalize_bl(self, mnemonic: str, row: str) -> str:
+        if mnemonic != "bl":
+            return row
+
+        row, _ = split_off_address(row)
+        return row + "<ignore>"
+
+    def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str:
+        """Identifies ADRP + LDR/ADD pairs that are used to access the GOT and
+        suppresses any immediate differences.
+
+        Whenever an ADRP is seen, the destination register is added to the set of registers
+        that are part of an ADRP + LDR/ADD pair. Registers are removed from the set as soon
+        as they are used for an LDR or ADD instruction which completes the pair.
+
+        This method is somewhat crude but should manage to detect most such pairs.
+        """
+        row_parts = row.split("\t", 1)
+        if mnemonic == "adrp":
+            self._adrp_pair_registers.add(row_parts[1].strip().split(",")[0])
+            row, _ = split_off_address(row)
+            return row + "<ignore>"
+        elif mnemonic == "ldr":
+            for reg in self._adrp_pair_registers:
+                # ldr xxx, [reg]
+                # ldr xxx, [reg, <imm>]
+                if f", [{reg}" in row_parts[1]:
+                    self._adrp_pair_registers.remove(reg)
+                    return normalize_imms(row, AARCH64_SETTINGS)
+        elif mnemonic == "add":
+            for reg in self._adrp_pair_registers:
+                # add reg, reg, <imm>
+                if row_parts[1].startswith(f"{reg}, {reg}, "):
+                    self._adrp_pair_registers.remove(reg)
+                    return normalize_imms(row, AARCH64_SETTINGS)
+
+        return row
+
+
+class AsmProcessorI686(AsmProcessor):
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        if "WRTSEG" in row:  # ignore WRTSEG (watcom)
+            return prev, None
+        repl = row.split()[-1]
+        mnemonic, args = prev.split(maxsplit=1)
+        offset = False
+
+        # Calls
+        # Example call a2f
+        # Example call *0
+        # Example jmp  64
+        if mnemonic in I686_BRANCH_INSTRUCTIONS:
+            addr_imm = re.search(r"(^|(?<=\*)|(?<=\*\%cs\:))[0-9a-f]+", args)
+
+        # Direct use of reloc
+        # Example 0x0,0x8(%edi)
+        # Example 0x0,%edi
+        # Example *0x0(,%edx,4)
+        # Example %edi,0
+        # Example movb $0x0,0x0
+        # Example $0x0,0x4(%edi)
+        # Match 0x0 part to replace
+        else:
+            addr_imm = re.search(r"(?:0x)?0+$", args)
+
+        if not addr_imm:
+            addr_imm = re.search(r"(^\$?|(?<=\*))0x0", args)
+
+        # Offset value
+        # Example 0x4,%eax
+        # Example $0x4,%eax
+        if not addr_imm:
+            addr_imm = re.search(r"(^|(?<=\*)|(?<=\$))0x[0-9a-f]+", args)
+            offset = True
+
+        if not addr_imm:
+            addr_imm = re.search(r"(^|(?<=\*)|(?<=\%[fgdecs]s\:))0x[0-9a-f]+", args)
+            offset = True
+
+        if not addr_imm:
+            assert False, f"failed to find address immediate for line '{prev}'"
+
+        start, end = addr_imm.span()
+
+        if "R_386_NONE" in row:
+            pass
+        elif "R_386_32" in row:
+            pass
+        elif "R_386_PC32" in row:
+            pass
+        elif "R_386_16" in row:
+            pass
+        elif "R_386_PC16" in row:
+            pass
+        elif "R_386_8" in row:
+            pass
+        elif "R_386_PC8" in row:
+            pass
+        elif "dir32" in row:
+            if "+" in repl:
+                repl = repl.split("+")[0]
+        elif "DISP32" in row:
+            pass
+        elif "OFF32" in row:
+            pass
+        elif "OFFPC32" in row:
+            if "+" in repl:
+                repl = repl.split("+")[0]
+        elif "R_386_GOT32" in row:
+            repl = f"%got({repl})"
+        elif "R_386_PLT32" in row:
+            repl = f"%plt({repl})"
+        elif "R_386_RELATIVE" in row:
+            repl = f"%rel({repl})"
+        elif "R_386_GOTOFF" in row:
+            repl = f"%got({repl})"
+        elif "R_386_GOTPC" in row:
+            repl = f"%got({repl})"
+        elif "R_386_32PLT" in row:
+            repl = f"%plt({repl})"
+        else:
+            assert False, f"unknown relocation type '{row}' for line '{prev}'"
+
+        if offset:
+            repl = f"{repl}+{addr_imm.group()}"
+
+        return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        return mnemonic == "ret"
+
+
+class AsmProcessorSH2(AsmProcessor):
+    def __init__(self, config: Config) -> None:
+        super().__init__(config)
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        return prev, None
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        return mnemonic == "rts"
+
+
+class AsmProcessorM68k(AsmProcessor):
+    def pre_process(
+        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
+    ) -> Tuple[str, str]:
+        # replace objdump's syntax of pointer accesses with the equivilant in AT&T syntax for readability
+        return mnemonic, re.sub(
+            r"%(sp|a[0-7]|fp|pc)@(?:(?:\((-?(?:0x[0-9a-f]+|[0-9]+)) *(,%d[0-7]:[wl])?\))|(\+)|(-))?",
+            r"\5\2(%\1\3)\4",
+            args,
+        )
+
+    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
+        repl = row.split()[-1]
+        mnemonic, args = prev.split(maxsplit=1)
+
+        addr_imm = re.search(r"(?<![#da])(0x[0-9a-f]+|[0-9]+) ?", args)
+        if not addr_imm:
+            assert False, f"failed to find address immediate for line '{prev}'"
+        start, end = addr_imm.span()
+
+        if "R_68K_NONE" in row:
+            pass
+        elif "R_68K_32" in row:
+            pass
+        elif "R_68K_16" in row:
+            pass
+        elif "R_68K_8" in row:
+            pass
+        elif "R_68K_GOT32O" in row:
+            repl = "@GOT"
+        elif "R_68K_GOT16O" in row:
+            repl += "@GOT"
+        elif "R_68K_GOT8O" in row:
+            repl += "@GOT"
+        elif "R_68K_GOT32" in row:
+            repl += "@GOTPC"
+        elif "R_68K_GOT16" in row:
+            repl += "@GOTPC"
+        elif "R_68K_GOT8" in row:
+            repl += "@GOTPC"
+        else:
+            assert False, f"unknown relocation type '{row}' for line '{prev}'"
+
+        return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl
+
+    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
+        return mnemonic == "rts" or mnemonic == "rte" or mnemonic == "rtr"
+
+
+@dataclass
+class ArchSettings:
+    name: str
+    re_int: Pattern[str]
+    re_comment: Pattern[str]
+    re_reg: Pattern[str]
+    re_sprel: Pattern[str]
+    re_large_imm: Pattern[str]
+    re_imm: Pattern[str]
+    re_reloc: Pattern[str]
+    branch_instructions: Set[str]
+    instructions_with_address_immediates: Set[str]
+    forbidden: Set[str] = field(default_factory=lambda: set(string.ascii_letters + "_"))
+    arch_flags: List[str] = field(default_factory=list)
+    branch_likely_instructions: Set[str] = field(default_factory=set)
+    proc: Type[AsmProcessor] = AsmProcessor
+    big_endian: Optional[bool] = True
+    delay_slot_instructions: Set[str] = field(default_factory=set)
+
+
+MIPS_BRANCH_LIKELY_INSTRUCTIONS = {
+    "beql",
+    "bnel",
+    "beqzl",
+    "bnezl",
+    "bgezl",
+    "bgtzl",
+    "blezl",
+    "bltzl",
+    "bc1tl",
+    "bc1fl",
+}
+MIPS_BRANCH_INSTRUCTIONS = MIPS_BRANCH_LIKELY_INSTRUCTIONS.union(
+    {
+        "b",
+        "beq",
+        "bne",
+        "beqz",
+        "bnez",
+        "bgez",
+        "bgtz",
+        "blez",
+        "bltz",
+        "bc1t",
+        "bc1f",
+    }
+)
+
+ARM32_PREFIXES = {"b", "bl"}
+ARM32_CONDS = {
+    "",
+    "eq",
+    "ne",
+    "cs",
+    "cc",
+    "mi",
+    "pl",
+    "vs",
+    "vc",
+    "hi",
+    "ls",
+    "ge",
+    "lt",
+    "gt",
+    "le",
+    "al",
+}
+ARM32_SUFFIXES = {"", ".n", ".w"}
+ARM32_BRANCH_INSTRUCTIONS = {
+    f"{prefix}{cond}{suffix}"
+    for prefix in ARM32_PREFIXES
+    for cond in ARM32_CONDS
+    for suffix in ARM32_SUFFIXES
+}
+
+AARCH64_BRANCH_INSTRUCTIONS = {
+    "b",
+    "b.eq",
+    "b.ne",
+    "b.cs",
+    "b.hs",
+    "b.cc",
+    "b.lo",
+    "b.mi",
+    "b.pl",
+    "b.vs",
+    "b.vc",
+    "b.hi",
+    "b.ls",
+    "b.ge",
+    "b.lt",
+    "b.gt",
+    "b.le",
+    "cbz",
+    "cbnz",
+    "tbz",
+    "tbnz",
+}
+
+PPC_BRANCH_INSTRUCTIONS = {
+    "b",
+    "beq",
+    "beq+",
+    "beq-",
+    "bne",
+    "bne+",
+    "bne-",
+    "blt",
+    "blt+",
+    "blt-",
+    "ble",
+    "ble+",
+    "ble-",
+    "bdz",
+    "bdz+",
+    "bdz-",
+    "bdnz",
+    "bdnz+",
+    "bdnz-",
+    "bge",
+    "bge+",
+    "bge-",
+    "bgt",
+    "bgt+",
+    "bgt-",
+    "bso",
+    "bso+",
+    "bso-",
+    "bns",
+    "bns+",
+    "bns-",
+}
+
+I686_BRANCH_INSTRUCTIONS = {
+    "call",
+    "jmp",
+    "ljmp",
+    "ja",
+    "jae",
+    "jb",
+    "jbe",
+    "jc",
+    "jcxz",
+    "jecxz",
+    "jrcxz",
+    "je",
+    "jg",
+    "jge",
+    "jl",
+    "jle",
+    "jna",
+    "jnae",
+    "jnb",
+    "jnbe",
+    "jnc",
+    "jne",
+    "jng",
+    "jnge",
+    "jnl",
+    "jnle",
+    "jno",
+    "jnp",
+    "jns",
+    "jnz",
+    "jo",
+    "jp",
+    "jpe",
+    "jpo",
+    "js",
+    "jz",
+    "ja",
+    "jae",
+    "jb",
+    "jbe",
+    "jc",
+    "je",
+    "jz",
+    "jg",
+    "jge",
+    "jl",
+    "jle",
+    "jna",
+    "jnae",
+    "jnb",
+    "jnbe",
+    "jnc",
+    "jne",
+    "jng",
+    "jnge",
+    "jnl",
+    "jnle",
+    "jno",
+    "jnp",
+    "jns",
+    "jnz",
+    "jo",
+    "jp",
+    "jpe",
+    "jpo",
+    "js",
+    "jz",
+}
+
+SH2_BRANCH_INSTRUCTIONS = {
+    "bf",
+    "bf.s",
+    "bt",
+    "bt.s",
+    "bra",
+    "bsr",
+}
+
+M68K_CONDS = {
+    "ra",
+    "cc",
+    "cs",
+    "eq",
+    "ge",
+    "gt",
+    "hi",
+    "le",
+    "ls",
+    "lt",
+    "mi",
+    "ne",
+    "pl",
+    "vc",
+    "vs",
+}
+
+M68K_BRANCH_INSTRUCTIONS = {
+    f"{prefix}{cond}{suffix}"
+    for prefix in {"b", "db"}
+    for cond in M68K_CONDS
+    for suffix in {"s", "w"}
+}.union(
+    {
+        "dbt",
+        "dbf",
+        "bsrw",
+        "bsrs",
+    }
+)
+
+
+MIPS_SETTINGS = ArchSettings(
+    name="mips",
+    re_int=re.compile(r"[0-9]+"),
+    re_comment=re.compile(r"<.*>"),
+    # Includes:
+    #   - General purpose registers v0..1, a0..7, t0..9, s0..8, zero, at, fp, k0..1/kt0..1
+    #   - Float registers f0..31, or fv0..1, fa0..7, ft0..15, fs0..8 plus odd complements
+    # (actually used number depends on ABI)
+    # sp, gp should not be in this list
+    re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"),
+    re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_imm=re.compile(
+        r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi|got|gp_rel|call16)\([^)]*\)"
+    ),
+    re_reloc=re.compile(r"R_MIPS_"),
+    arch_flags=["-m", "mips:4300"],
+    branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS,
+    branch_instructions=MIPS_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union(
+        {"j", "jal", "bal"}
+    ),
+    delay_slot_instructions=MIPS_BRANCH_INSTRUCTIONS.union(
+        {"j", "jal", "jr", "jalr", "bal"}
+    ),
+    proc=AsmProcessorMIPS,
+)
+
+MIPSEL_SETTINGS = replace(
+    MIPS_SETTINGS, name="mipsel", big_endian=False, arch_flags=["-m", "mips:3000"]
+)
+
+MIPSEE_SETTINGS = replace(
+    MIPSEL_SETTINGS, name="mipsee", arch_flags=["-m", "mips:5900"]
+)
+
+MIPSEL_4000_SETTINGS = replace(
+    MIPSEL_SETTINGS, name="mipsel:4000", arch_flags=["-m", "mips:gs464"]
+)
+
+MIPS_ARCH_NAMES = {"mips", "mipsel", "mipsee", "mipsel:4000"}
+
+ARM32_SETTINGS = ArchSettings(
+    name="arm32",
+    re_int=re.compile(r"[0-9]+"),
+    re_comment=re.compile(r"(<.*>|//.*$)"),
+    # Includes:
+    #   - General purpose registers: r0..13
+    #   - Frame pointer registers: lr (r14), pc (r15)
+    #   - VFP/NEON registers: s0..31, d0..31, q0..15, fpscr, fpexc, fpsid
+    # SP should not be in this list.
+    re_reg=re.compile(
+        r"\$?\b([rq][0-9]|[rq]1[0-5]|pc|lr|[ds][12]?[0-9]|[ds]3[01]|fp(scr|exc|sid))\b"
+    ),
+    re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_imm=re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
+    re_reloc=re.compile(r"R_ARM_"),
+    branch_instructions=ARM32_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=ARM32_BRANCH_INSTRUCTIONS.union({"adr"}),
+    proc=AsmProcessorARM32,
+)
+
+ARMEL_SETTINGS = replace(ARM32_SETTINGS, name="armel", big_endian=False)
+
+AARCH64_SETTINGS = ArchSettings(
+    name="aarch64",
+    re_int=re.compile(r"[0-9]+"),
+    re_comment=re.compile(r"(<.*>|//.*$)"),
+    # GPRs and FP registers: X0-X30, W0-W30, [BHSDVQ]0..31
+    # (FP registers may be followed by data width and number of elements, e.g. V0.4S)
+    # The zero registers and SP should not be in this list.
+    re_reg=re.compile(
+        r"\$?\b([bhsdvq]([12]?[0-9]|3[01])(\.\d\d?[bhsdvq])?|[xw][12]?[0-9]|[xw]30)\b"
+    ),
+    re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_imm=re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
+    re_reloc=re.compile(r"R_AARCH64_"),
+    arch_flags=["--no-show-raw-insn"],
+    branch_instructions=AARCH64_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=AARCH64_BRANCH_INSTRUCTIONS.union(
+        {"bl", "adrp"}
+    ),
+    proc=AsmProcessorAArch64,
+)
+
+PPC_SETTINGS = ArchSettings(
+    name="ppc",
+    re_int=re.compile(r"[0-9]+"),
+    re_comment=re.compile(r"(<.*>|//.*$)"),
+    # r1 not included
+    re_reg=re.compile(r"\$?\b([rf](?:[02-9]|[1-9][0-9]+)|f1)\b"),
+    re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"),
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_imm=re.compile(
+        r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1\))|[^ \t,]+@(l|ha|h|sda21)"
+    ),
+    re_reloc=re.compile(r"R_PPC_"),
+    arch_flags=["-m", "powerpc", "-M", "broadway"],
+    branch_instructions=PPC_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}),
+    proc=AsmProcessorPPC,
+)
+
+I686_SETTINGS = ArchSettings(
+    name="i686",
+    re_int=re.compile(r"[0-9]+"),
+    re_comment=re.compile(r"<.*>"),
+    # Includes:
+    #   - (e)a-d(x,l,h)
+    #   - (e)s,d,b(i,p)(l)
+    #   - cr0-7
+    #   - x87 st
+    #   - MMX, SSE vector registers
+    #   - cursed registers: eal ebl ebh edl edh...
+    re_reg=re.compile(
+        r"\%?\b(e?(([sd]i|[sb]p)l?|[abcd][xhl])|[cdesfg]s|cr[0-7]|x?mm[0-7]|st)\b"
+    ),
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%ebp|%esi)\))"),
+    re_imm=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)|([\?$_][^ \t,]+)"),
+    re_reloc=re.compile(r"R_386_|dir32|DISP32|WRTSEG|OFF32|OFFPC32"),
+    # The x86 architecture has a variable instruction length. The raw bytes of
+    # an instruction as displayed by objdump can line wrap if it's long enough.
+    # This destroys the objdump output processor logic, so we avoid this.
+    arch_flags=["-m", "i386", "--no-show-raw-insn"],
+    branch_instructions=I686_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=I686_BRANCH_INSTRUCTIONS.union({"mov"}),
+    proc=AsmProcessorI686,
+)
+
+SH2_SETTINGS = ArchSettings(
+    name="sh2",
+    # match -128-127 preceded by a '#' with a ',' after (8 bit immediates)
+    re_int=re.compile(r"(?<=#)(-?(?:1[01][0-9]|12[0-8]|[1-9][0-9]?|0))(?=,)"),
+    # match <text>, match ! and after
+    re_comment=re.compile(r"<.*?>|!.*"),
+    #   - r0-r15 general purpose registers, r15 is stack pointer during exceptions
+    #   - sr, gbr, vbr - control registers
+    #   - mach, macl, pr, pc - system registers
+    re_reg=re.compile(r"r1[0-5]|r[0-9]"),
+    # sh2 has pc-relative and gbr-relative but not stack-pointer-relative
+    re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
+    # max immediate size is 8-bit
+    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
+    re_imm=re.compile(r"\b0[xX][0-9a-fA-F]+\b"),
+    # https://github.com/bminor/binutils-gdb/blob/master/bfd/elf32-sh-relocs.h#L21
+    re_reloc=re.compile(r"R_SH_"),
+    arch_flags=["-m", "sh2"],
+    branch_instructions=SH2_BRANCH_INSTRUCTIONS,
+    instructions_with_address_immediates=SH2_BRANCH_INSTRUCTIONS.union(
+        {"bf", "bf.s", "bt", "bt.s", "bra", "bsr"}
+    ),
+    delay_slot_instructions=SH2_BRANCH_INSTRUCTIONS.union(
+        {"bf.s", "bt.s", "bra", "braf", "bsr", "bsrf", "jmp", "jsr", "rts"}
+    ),
+    proc=AsmProcessorSH2,
+)
+
+SH4_SETTINGS = replace(
+    SH2_SETTINGS,
+    name="sh4",
+    #   - fr0-fr15, dr0-dr14, xd0-xd14, fv0-fv12 FP registers
+    #     dr/xd registers can only be even-numbered, and fv registers can only be a multiple of 4
+    re_reg=re.compile(
+        r"r1[0-5]|r[0-9]|fr1[0-5]|fr[0-9]|dr[02468]|dr1[024]|xd[02468]|xd1[024]|fv[048]|fv12"
+    ),
+    arch_flags=["-m", "sh4"],
+)
+
+SH4EL_SETTINGS = replace(SH4_SETTINGS, name="sh4el", big_endian=False)
+
+M68K_SETTINGS = ArchSettings(
+    name="m68k",
+    re_int=re.compile(r"[0-9]+"),
+    # '|' is used by assemblers, but is not used by objdump
+    re_comment=re.compile(r"<.*>"),
+    # Includes:
+    # - d0-d7 data registers
+    # - a0-a6 address registers
+    # - fp0-fp7 floating-point registers
+    # - usp (user sp)
+    # - fp, sr, ccr
+    # - fpcr, fpsr, fpiar
+    re_reg=re.compile(r"%\b(d[0-7]|a[0-6]|usp|fp([0-7]|cr|sr|iar)?|sr|ccr)(:[wl])?\b"),
+    # This matches all stack accesses that do not use an index register
+    re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%sp|%a7)\))"),
+    re_imm=re.compile(r"#?-?\b(0x[0-9a-f]+|[0-9]+)(?!\()"),
+    re_large_imm=re.compile(r"#?-?([1-9][0-9]{2,}|0x[0-9a-f]{3,})"),
+    re_reloc=re.compile(r"R_68K_"),
+    arch_flags=["-m", "m68k"],
+    branch_instructions=M68K_BRANCH_INSTRUCTIONS,
+    # Pretty much every instruction can take an address immediate
+    instructions_with_address_immediates=M68K_BRANCH_INSTRUCTIONS.union("jmp", "jsr"),
+    proc=AsmProcessorM68k,
+)
+
+ARCH_SETTINGS = [
+    MIPS_SETTINGS,
+    MIPSEL_SETTINGS,
+    MIPSEE_SETTINGS,
+    MIPSEL_4000_SETTINGS,
+    ARM32_SETTINGS,
+    ARMEL_SETTINGS,
+    AARCH64_SETTINGS,
+    PPC_SETTINGS,
+    I686_SETTINGS,
+    SH2_SETTINGS,
+    SH4_SETTINGS,
+    SH4EL_SETTINGS,
+    M68K_SETTINGS,
+]
+
+
+def immediate_to_int(immediate: str) -> int:
+    imm_match = re.match(r"#?(0x)?([0-9a-f]+)", immediate)
+    assert imm_match
+    base = 16 if imm_match.group(1) else 10
+    return int(imm_match.group(2), base)
+
+
+def is_hexstring(value: str) -> bool:
+    try:
+        int(value, 16)
+        return True
+    except ValueError:
+        return False
+
+
+def hexify_int(row: str, pat: Match[str], arch: ArchSettings) -> str:
+    full = pat.group(0)
+
+    # sh2/sh4 only has 8-bit immediates, just convert them uniformly without
+    # any -hex stuff
+    if arch.name == "sh2" or arch.name == "sh4" or arch.name == "sh4el":
+        return hex(int(full) & 0xFF)
+
+    if len(full) <= 1:
+        # leave one-digit ints alone
+        return full
+    start, end = pat.span()
+    if start and row[start - 1] in arch.forbidden:
+        return full
+    if end < len(row) and row[end] in arch.forbidden:
+        return full
+    return hex(int(full))
+
+
+def parse_relocated_line(line: str) -> Tuple[str, str, str]:
+    # Pick out the last argument
+    for c in ",\t ":
+        if c in line:
+            ind2 = line.rindex(c)
+            break
+    else:
+        raise Exception(f"failed to parse relocated line: {line}")
+    before = line[: ind2 + 1]
+    after = line[ind2 + 1 :]
+    # Move an optional ($reg) part of it to 'after'
+    ind2 = after.find("(")
+    if ind2 == -1:
+        imm, after = after, ""
+    else:
+        imm, after = after[:ind2], after[ind2:]
+    return before, imm, after
+
+
+def reloc_addend_from_imm(imm: str, before: str, arch: ArchSettings) -> str:
+    """For architectures like MIPS where relocations have addends embedded in
+    the code as immediates, convert such an immediate into an addition/
+    subtraction that can occur just after the symbol."""
+    # TODO this is incorrect for MIPS %lo/%hi which need to be paired up
+    # and combined. In practice, this means we only get symbol offsets within
+    # %lo, while %hi just shows the symbol. Unfortunately, objdump's output
+    # loses relocation order, so we cannot do this without parsing ELF relocs
+    # ourselves...
+    mnemonic = before.split()[0]
+    if mnemonic in arch.instructions_with_address_immediates:
+        addend = int(imm, 16)
+    else:
+        addend = int(imm, 0)
+    if addend == 0:
+        return ""
+    elif addend < 0:
+        return hex(addend)
+    else:
+        return "+" + hex(addend)
+
+
+def pad_mnemonic(line: str) -> str:
+    if "\t" not in line:
+        return line
+    mn, args = line.split("\t", 1)
+    return f"{mn:<7s} {args}"
+
+
+@dataclass
+class Line:
+    mnemonic: str
+    diff_row: str
+    original: str
+    normalized_original: str
+    scorable_line: str
+    symbol: Optional[str] = None
+    line_num: Optional[int] = None
+    branch_target: Optional[int] = None
+    data_pool_addr: Optional[int] = None
+    source_filename: Optional[str] = None
+    source_line_num: Optional[int] = None
+    source_lines: List[str] = field(default_factory=list)
+    comment: Optional[str] = None
+
+
+def process(dump: str, config: Config) -> List[Line]:
+    arch = config.arch
+    processor = arch.proc(config)
+    source_lines = []
+    source_filename = None
+    source_line_num = None
+    rets_remaining = config.stop_at_ret
+
+    i = 0
+    num_instr = 0
+    data_refs: Dict[int, Dict[str, List[int]]] = defaultdict(lambda: defaultdict(list))
+    output: List[Line] = []
+    lines = dump.split("\n")
+    while i < len(lines):
+        row = lines[i]
+        i += 1
+
+        if not row:
+            continue
+
+        # Check if the currrent line has "OFFSET <SYMBOL>:"
+        function_label_match = re.match(r"^[0-9a-f]+ <(.*)>:$", row)
+
+        if function_label_match:
+            function_name = function_label_match.groups()[0] + ":"
+
+            if config.diff_function_symbols:
+                # If diffing function symbols is enabled
+                # Add the symbol to the diff output
+
+                output.append(
+                    Line(
+                        mnemonic="<label>",
+                        diff_row=function_name,
+                        original=function_name,
+                        normalized_original=function_name,
+                        scorable_line="label " + function_name,
+                    )
+                )
+            continue
+
+        if row.startswith("DATAREF"):
+            parts = row.split(" ", 3)
+            text_offset = int(parts[1])
+            from_offset = int(parts[2])
+            from_section = parts[3]
+            data_refs[text_offset][from_section].append(from_offset)
+            continue
+
+        if config.diff_obj and num_instr >= config.max_function_size_lines:
+            output.append(
+                Line(
+                    mnemonic="...",
+                    diff_row="...",
+                    original="...",
+                    normalized_original="...",
+                    scorable_line="...",
+                )
+            )
+            break
+
+        if not re.match(r"^\s+[0-9a-f]+:\s+", row):
+            # This regex is conservative, and assumes the file path does not contain "weird"
+            # characters like tabs or angle brackets.
+            if re.match(r"^[^ \t<>][^\t<>]*:[0-9]+( \(discriminator [0-9]+\))?$", row):
+                source_filename, _, tail = row.rpartition(":")
+                source_line_num = int(tail.partition(" ")[0])
+            source_lines.append(row)
+            continue
+
+        # If the instructions loads a data pool symbol, extract the address of
+        # the symbol.
+        data_pool_addr = None
+        pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row)
+        if pool_match:
+            offset = pool_match.group(3).split(" ")[0][1:]
+            data_pool_addr = int(offset, 16)
+
+        m_comment = re.search(arch.re_comment, row)
+        comment = m_comment[0] if m_comment else None
+        row = re.sub(arch.re_comment, "", row)
+        line_num_str = row.split(":")[0]
+        row = row.rstrip()
+        tabs = row.split("\t")
+        line_num = eval_line_num(line_num_str.strip())
+
+        # TODO: use --no-show-raw-insn for all arches
+        if arch.name == "i686" or arch.name == "aarch64":
+            row = "\t".join(tabs[1:])
+        else:
+            row = "\t".join(tabs[2:])
+
+        if line_num in data_refs:
+            refs = data_refs[line_num]
+            ref_str = "; ".join(
+                section_name + "+" + ",".join(hex(off) for off in offs)
+                for section_name, offs in refs.items()
+            )
+            output.append(
+                Line(
+                    mnemonic="<data-ref>",
+                    diff_row="<data-ref>",
+                    original=ref_str,
+                    normalized_original=ref_str,
+                    scorable_line="<data-ref>",
+                )
+            )
+
+        if "\t" in row:
+            row_parts = row.split("\t", 1)
+        else:
+            # powerpc-eabi-objdump doesn't use tabs
+            row_parts = [part.lstrip() for part in row.split(" ", 1)]
+
+        mnemonic = row_parts[0].strip()
+        args = row_parts[1].strip() if len(row_parts) >= 2 else ""
+
+        next_line = lines[i] if i < len(lines) else None
+        mnemonic, args = processor.pre_process(mnemonic, args, next_line, comment)
+        row = mnemonic + "\t" + args.replace("\t", "  ")
+
+        addr = ""
+        if mnemonic in arch.instructions_with_address_immediates:
+            row, addr = split_off_address(row)
+            # objdump prefixes addresses with 0x/-0x if they don't resolve to some
+            # symbol + offset. Strip that.
+            addr = addr.replace("0x", "")
+
+        row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row)
+        row += addr
+
+        # Let 'original' be 'row' with relocations applied, while we continue
+        # transforming 'row' into a coarser version that ignores registers and
+        # immediates.
+        original = row
+
+        symbol = None
+        while i < len(lines):
+            reloc_row = lines[i]
+            if re.search(arch.re_reloc, reloc_row):
+                original, reloc_symbol = processor.process_reloc(reloc_row, original)
+                if reloc_symbol is not None:
+                    symbol = reloc_symbol
+            else:
+                break
+            i += 1
+
+        is_text_relative_j = False
+        if (
+            arch.name in MIPS_ARCH_NAMES
+            and mnemonic == "j"
+            and symbol is not None
+            and symbol.startswith(".text")
+        ):
+            symbol = None
+            original = row
+            is_text_relative_j = True
+
+        normalized_original = processor.normalize(mnemonic, original)
+
+        scorable_line = normalized_original
+        if not config.score_stack_differences:
+            scorable_line = re.sub(arch.re_sprel, "addr(sp)", scorable_line)
+
+        row = re.sub(arch.re_reg, "<reg>", row)
+        row = re.sub(arch.re_sprel, "addr(sp)", row)
+        if mnemonic in arch.instructions_with_address_immediates:
+            row = row.strip()
+            row, _ = split_off_address(row)
+            row += "<imm>"
+        else:
+            row = normalize_imms(row, arch)
+
+        branch_target = None
+        if (
+            mnemonic in arch.branch_instructions or is_text_relative_j
+        ) and symbol is None:
+            # Here, we try to match a wide variety of addressing mode:
+            # - Global deref with offset: *0x1234(%eax)
+            # - Global deref: *0x1234
+            # - Register deref: *(%eax)
+            #
+            # We first have a single regex to match register deref and global
+            # deref with offset
+            x86_longjmp = re.search(r"\*(.*)\(", args)
+            if x86_longjmp:
+                capture = x86_longjmp.group(1)
+                if capture != "" and capture.isnumeric():
+                    branch_target = int(capture, 16)
+            else:
+                # Then, we try to match the global deref in a separate regex.
+                x86_longjmp = re.search(r"\*(.*)", args)
+                if x86_longjmp:
+                    capture = x86_longjmp.group(1)
+                    if capture != "" and capture.isnumeric():
+                        branch_target = int(capture, 16)
+                else:
+                    branch_target = int(args.split(",")[-1], 16)
+
+        output.append(
+            Line(
+                mnemonic=mnemonic,
+                diff_row=row,
+                original=original,
+                normalized_original=normalized_original,
+                scorable_line=scorable_line,
+                symbol=symbol,
+                line_num=line_num,
+                branch_target=branch_target,
+                data_pool_addr=data_pool_addr,
+                source_filename=source_filename,
+                source_line_num=source_line_num,
+                source_lines=source_lines,
+                comment=comment,
+            )
+        )
+        num_instr += 1
+        source_lines = []
+
+        if rets_remaining and processor.is_end_of_function(mnemonic, args):
+            rets_remaining -= 1
+            if rets_remaining == 0:
+                break
+
+    processor.post_process(output)
+    return output
+
+
+def normalize_imms(row: str, arch: ArchSettings) -> str:
+    return re.sub(arch.re_imm, "<imm>", row)
+
+
+def normalize_stack(row: str, arch: ArchSettings) -> str:
+    return re.sub(arch.re_sprel, "addr(sp)", row)
+
+
+def check_for_symbol_mismatch(
+    old_line: Line, new_line: Line, symbol_map: Dict[str, str]
+) -> bool:
+    assert old_line.symbol is not None
+    assert new_line.symbol is not None
+
+    if new_line.symbol.startswith("%hi"):
+        return False
+
+    if old_line.symbol not in symbol_map:
+        symbol_map[old_line.symbol] = new_line.symbol
+        return False
+    elif symbol_map[old_line.symbol] == new_line.symbol:
+        return False
+
+    return True
+
+
+def field_matches_any_symbol(field: str, arch: ArchSettings) -> bool:
+    if arch.name == "ppc":
+        if "..." in field:
+            return True
+
+        parts = field.rsplit("@", 1)
+        if len(parts) == 2 and parts[1] in {"l", "h", "ha", "sda21"}:
+            field = parts[0]
+
+        return re.fullmatch((r"^@\d+$"), field) is not None
+
+    if arch.name in MIPS_ARCH_NAMES:
+        return "." in field
+
+    # Example: ".text+0x34"
+    if arch.name == "arm32":
+        return "." in field
+
+    return False
+
+
+def split_off_address(line: str) -> Tuple[str, str]:
+    """Split e.g. 'beqz $r0,1f0' into 'beqz $r0,' and '1f0'."""
+    parts = line.split(",")
+    if len(parts) < 2:
+        parts = line.split(None, 1)
+        if len(parts) < 2:
+            parts.append("")
+    off = len(line) - len(parts[-1].strip())
+    return line[:off], line[off:]
+
+
+def diff_sequences_difflib(
+    seq1: List[str], seq2: List[str]
+) -> Sequence[Tuple[str, int, int, int, int]]:
+    differ = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False)
+    return differ.get_opcodes()
+
+
+def diff_sequences(
+    seq1: List[str], seq2: List[str], algorithm: str
+) -> Sequence[Tuple[str, int, int, int, int]]:
+    if algorithm != "levenshtein":
+        return diff_sequences_difflib(seq1, seq2)
+
+    # The Levenshtein library assumes that we compare strings, not lists. Convert.
+    remapping: Dict[str, str] = {}
+
+    def remap(seq: List[str]) -> str:
+        seq = seq[:]
+        for i in range(len(seq)):
+            val = remapping.get(seq[i])
+            if val is None:
+                val = chr(len(remapping))
+                remapping[seq[i]] = val
+            seq[i] = val
+        return "".join(seq)
+
+    try:
+        rem1 = remap(seq1)
+        rem2 = remap(seq2)
+    except ValueError:
+        if len(seq1) + len(seq2) < 0x110000:
+            raise
+        # If there are too many unique elements, chr() doesn't work.
+        # Assume this is the case and fall back to difflib.
+        return diff_sequences_difflib(seq1, seq2)
+
+    import Levenshtein
+
+    ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2)
+    return ret
+
+
+def diff_lines(
+    lines1: List[Line],
+    lines2: List[Line],
+    algorithm: str,
+) -> List[Tuple[Optional[Line], Optional[Line]]]:
+    ret = []
+    for tag, i1, i2, j1, j2 in diff_sequences(
+        [line.mnemonic for line in lines1],
+        [line.mnemonic for line in lines2],
+        algorithm,
+    ):
+        for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
+            if tag == "replace":
+                if line1 is None:
+                    tag = "insert"
+                elif line2 is None:
+                    tag = "delete"
+            elif tag == "insert":
+                assert line1 is None
+            elif tag == "delete":
+                assert line2 is None
+            ret.append((line1, line2))
+
+    return ret
+
+
+def diff_sameline(
+    old_line: Line, new_line: Line, config: Config, symbol_map: Dict[str, str]
+) -> Tuple[int, int, bool]:
+    old = old_line.scorable_line
+    new = new_line.scorable_line
+    if old == new:
+        return (0, 0, False)
+
+    num_stack_penalties = 0
+    num_regalloc_penalties = 0
+    has_symbol_mismatch = False
+
+    ignore_last_field = False
+    if config.score_stack_differences:
+        oldsp = re.search(config.arch.re_sprel, old)
+        newsp = re.search(config.arch.re_sprel, new)
+        if oldsp and newsp:
+            oldrel = int(oldsp.group(1) or "0", 0)
+            newrel = int(newsp.group(1) or "0", 0)
+            num_stack_penalties += abs(oldrel - newrel)
+            ignore_last_field = True
+
+    # Probably regalloc difference, or signed vs unsigned
+
+    # Compare each field in order
+    new_parts, old_parts = new.split(None, 1), old.split(None, 1)
+    newfields = new_parts[1].split(",") if len(new_parts) > 1 else []
+    oldfields = old_parts[1].split(",") if len(old_parts) > 1 else []
+    if ignore_last_field:
+        newfields = newfields[:-1]
+        oldfields = oldfields[:-1]
+    else:
+        # If the last field has a parenthesis suffix, e.g. "0x38(r7)"
+        # we split that part out to make it a separate field
+        # however, we don't split if it has a proceeding % macro, e.g. "%lo(.data)"
+        re_paren = re.compile(r"(?<!%hi)(?<!%lo)(?<!%got)(?<!%call16)(?<!%gp_rel)\(")
+        oldfields = oldfields[:-1] + (
+            re_paren.split(oldfields[-1]) if len(oldfields) > 0 else []
+        )
+        newfields = newfields[:-1] + (
+            re_paren.split(newfields[-1]) if len(newfields) > 0 else []
+        )
+
+    for nf, of in zip(newfields, oldfields):
+        if nf != of:
+            # If the new field is a match to any symbol case
+            # and the old field had a relocation, then ignore this mismatch
+            if (
+                new_line.symbol
+                and old_line.symbol
+                and field_matches_any_symbol(nf, config.arch)
+            ):
+                if check_for_symbol_mismatch(old_line, new_line, symbol_map):
+                    has_symbol_mismatch = True
+                continue
+            num_regalloc_penalties += 1
+
+    # Penalize any extra fields
+    num_regalloc_penalties += abs(len(newfields) - len(oldfields))
+
+    return (num_stack_penalties, num_regalloc_penalties, has_symbol_mismatch)
+
+
+def score_diff_lines(
+    lines: List[Tuple[Optional[Line], Optional[Line]]],
+    config: Config,
+    symbol_map: Dict[str, str],
+) -> int:
+    # This logic is copied from `scorer.py` from the decomp permuter project
+    # https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py
+    num_stack_penalties = 0
+    num_regalloc_penalties = 0
+    num_reordering_penalties = 0
+    num_insertion_penalties = 0
+    num_deletion_penalties = 0
+    deletions = []
+    insertions = []
+
+    def diff_insert(line: str) -> None:
+        # Reordering or totally different codegen.
+        # Defer this until later when we can tell.
+        insertions.append(line)
+
+    def diff_delete(line: str) -> None:
+        deletions.append(line)
+
+    # Find the end of the last long streak of matching mnemonics, if it looks
+    # like the objdump output was truncated. This is used to skip scoring
+    # misaligned lines at the end of the diff.
+    last_mismatch = -1
+    max_index = None
+    lines_were_truncated = False
+    for index, (line1, line2) in enumerate(lines):
+        if (line1 and line1.original == "...") or (line2 and line2.original == "..."):
+            lines_were_truncated = True
+        if line1 and line2 and line1.mnemonic == line2.mnemonic:
+            if index - last_mismatch >= 50:
+                max_index = index
+        else:
+            last_mismatch = index
+    if not lines_were_truncated:
+        max_index = None
+
+    for index, (line1, line2) in enumerate(lines):
+        if max_index is not None and index > max_index:
+            break
+        if line1 and line2 and line1.mnemonic == line2.mnemonic:
+            sp, rp, _ = diff_sameline(line1, line2, config, symbol_map)
+            num_stack_penalties += sp
+            num_regalloc_penalties += rp
+        else:
+            if line1:
+                diff_delete(line1.scorable_line)
+            if line2:
+                diff_insert(line2.scorable_line)
+
+    insertions_co = Counter(insertions)
+    deletions_co = Counter(deletions)
+    for item in insertions_co + deletions_co:
+        ins = insertions_co[item]
+        dels = deletions_co[item]
+        common = min(ins, dels)
+        num_insertion_penalties += ins - common
+        num_deletion_penalties += dels - common
+        num_reordering_penalties += common
+
+    return (
+        num_stack_penalties * config.penalty_stackdiff
+        + num_regalloc_penalties * config.penalty_regalloc
+        + num_reordering_penalties * config.penalty_reordering
+        + num_insertion_penalties * config.penalty_insertion
+        + num_deletion_penalties * config.penalty_deletion
+    )
+
+
+@dataclass(frozen=True)
+class OutputLine:
+    base: Optional[Text] = field(compare=False)
+    fmt2: Text = field(compare=False)
+    key2: Optional[str]
+    boring: bool = field(compare=False)
+    is_data_ref: bool = field(compare=False)
+    line1: Optional[Line] = field(compare=False)
+    line2: Optional[Line] = field(compare=False)
+
+
+@dataclass(frozen=True)
+class Diff:
+    lines: List[OutputLine]
+    score: int
+    max_score: int
+
+
+def trim_nops(lines: List[Line], arch: ArchSettings) -> List[Line]:
+    lines = lines[:]
+    while (
+        lines
+        and lines[-1].mnemonic == "nop"
+        and (len(lines) == 1 or lines[-2].mnemonic not in arch.delay_slot_instructions)
+    ):
+        lines.pop()
+    return lines
+
+
+def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
+    if config.show_source:
+        import cxxfilt
+    arch = config.arch
+    fmt = config.formatter
+    output: List[OutputLine] = []
+    symbol_map: Dict[str, str] = {}
+
+    sc1 = symbol_formatter("base-reg", 0)
+    sc2 = symbol_formatter("my-reg", 0)
+    sc3 = symbol_formatter("base-stack", 4)
+    sc4 = symbol_formatter("my-stack", 4)
+    sc5 = symbol_formatter("base-branch", 0)
+    sc6 = symbol_formatter("my-branch", 0)
+    bts1: Set[int] = set()
+    bts2: Set[int] = set()
+
+    if config.show_branches:
+        for lines, btset, sc in [
+            (lines1, bts1, sc5),
+            (lines2, bts2, sc6),
+        ]:
+            for line in lines:
+                bt = line.branch_target
+                if bt is not None:
+                    btset.add(bt)
+                    sc(str(bt))
+
+    lines1 = trim_nops(lines1, arch)
+    lines2 = trim_nops(lines2, arch)
+
+    diffed_lines = diff_lines(lines1, lines2, config.algorithm)
+
+    line_num_base = -1
+    line_num_offset = 0
+    line_num_2to1 = {}
+    for line1, line2 in diffed_lines:
+        if line1 is not None and line1.line_num is not None:
+            line_num_base = line1.line_num
+            line_num_offset = 0
+        else:
+            line_num_offset += 1
+        if line2 is not None and line2.line_num is not None:
+            line_num_2to1[line2.line_num] = (line_num_base, line_num_offset)
+
+    for line1, line2 in diffed_lines:
+        line_color1 = line_color2 = sym_color = BasicFormat.NONE
+        line_prefix = " "
+        is_data_ref = False
+        out1 = Text() if not line1 else Text(pad_mnemonic(line1.original))
+        out2 = Text() if not line2 else Text(pad_mnemonic(line2.original))
+        if line1 and line2 and line1.diff_row == line2.diff_row:
+            if line1.diff_row == "<data-ref>":
+                if line1.normalized_original != line2.normalized_original:
+                    line_prefix = "i"
+                    sym_color = BasicFormat.DIFF_CHANGE
+                    out1 = out1.reformat(sym_color)
+                    out2 = out2.reformat(sym_color)
+                is_data_ref = True
+            elif (
+                line1.normalized_original == line2.normalized_original
+                and line2.branch_target is None
+            ):
+                # Fast path: no coloring needed. We don't include branch instructions
+                # in this case because we need to check that their targets line up in
+                # the diff, and don't just happen to have the are the same address
+                # by accident.
+                pass
+            else:
+                mnemonic = line1.original.split()[0]
+                branchless1, address1 = out1.plain(), ""
+                branchless2, address2 = out2.plain(), ""
+                if mnemonic in arch.instructions_with_address_immediates:
+                    branchless1, address1 = split_off_address(branchless1)
+                    branchless2, address2 = split_off_address(branchless2)
+
+                out1 = Text(branchless1)
+                out2 = Text(branchless2)
+                out1, out2 = format_fields(
+                    arch.re_imm, out1, out2, lambda _: BasicFormat.IMMEDIATE
+                )
+
+                if line2.branch_target is not None:
+                    target = line2.branch_target
+                    line2_target = line_num_2to1.get(line2.branch_target)
+                    if line2_target is None:
+                        # If the target is outside the disassembly, extrapolate.
+                        # This only matters near the bottom.
+                        assert line2.line_num is not None
+                        line2_line = line_num_2to1[line2.line_num]
+                        line2_target = (line2_line[0] + (target - line2.line_num), 0)
+
+                    # Adjust the branch target for scoring and three-way diffing.
+                    norm2, norm_branch2 = split_off_address(line2.normalized_original)
+                    if norm_branch2 != "<ignore>":
+                        retargetted = hex(line2_target[0]).replace("0x", "")
+                        if line2_target[1] != 0:
+                            retargetted += f"+{line2_target[1]}"
+                        line2.normalized_original = norm2 + retargetted
+                        sc_base, _ = split_off_address(line2.scorable_line)
+                        line2.scorable_line = sc_base + retargetted
+                    same_target = line2_target == (line1.branch_target, 0)
+                else:
+                    # Do a naive comparison for non-branches (e.g. function calls).
+                    same_target = address1 == address2
+
+                if normalize_imms(branchless1, arch) == normalize_imms(
+                    branchless2, arch
+                ):
+                    (
+                        stack_penalties,
+                        regalloc_penalties,
+                        has_symbol_mismatch,
+                    ) = diff_sameline(line1, line2, config, symbol_map)
+
+                    if (
+                        regalloc_penalties == 0
+                        and stack_penalties == 0
+                        and not has_symbol_mismatch
+                    ):
+                        # ignore differences due to %lo(.rodata + ...) vs symbol
+                        out1 = out1.reformat(BasicFormat.NONE)
+                        out2 = out2.reformat(BasicFormat.NONE)
+                    elif line2.branch_target is not None and same_target:
+                        # same-target branch, don't color
+                        pass
+                    else:
+                        # must have an imm difference (or else we would have hit the
+                        # fast path)
+                        sym_color = BasicFormat.IMMEDIATE
+                        line_prefix = "i"
+                else:
+                    out1, out2 = format_fields(arch.re_sprel, out1, out2, sc3, sc4)
+                    if normalize_stack(branchless1, arch) == normalize_stack(
+                        branchless2, arch
+                    ):
+                        # only stack differences (luckily stack and imm
+                        # differences can't be combined in MIPS, so we
+                        # don't have to think about that case)
+                        sym_color = BasicFormat.STACK
+                        line_prefix = "s"
+                    else:
+                        # reg differences and maybe imm as well
+                        out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2)
+                        cats = config.reg_categories
+                        if cats and any(
+                            cats.get(of.group()) != cats.get(nf.group())
+                            for (of, nf) in zip(
+                                out1.finditer(arch.re_reg), out2.finditer(arch.re_reg)
+                            )
+                        ):
+                            sym_color = BasicFormat.REGISTER_CATEGORY
+                            line_prefix = "R"
+                        else:
+                            sym_color = BasicFormat.REGISTER
+                            line_prefix = "r"
+                        line_color1 = line_color2 = sym_color
+
+                if same_target:
+                    address_imm_fmt = BasicFormat.NONE
+                else:
+                    address_imm_fmt = BasicFormat.IMMEDIATE
+                out1 += Text(address1, address_imm_fmt)
+                out2 += Text(address2, address_imm_fmt)
+        elif line1 and line2:
+            line_prefix = "|"
+            line_color1 = line_color2 = sym_color = BasicFormat.DIFF_CHANGE
+            out1 = out1.reformat(line_color1)
+            out2 = out2.reformat(line_color2)
+        elif line1:
+            line_prefix = "<"
+            line_color1 = sym_color = BasicFormat.DIFF_REMOVE
+            out1 = out1.reformat(line_color1)
+            out2 = Text()
+        elif line2:
+            line_prefix = ">"
+            line_color2 = sym_color = BasicFormat.DIFF_ADD
+            out1 = Text()
+            out2 = out2.reformat(line_color2)
+
+        if config.show_source and line2 and line2.comment:
+            out2 += f" {line2.comment}"
+
+        def format_part(
+            out: Text,
+            line: Optional[Line],
+            line_color: Format,
+            btset: Set[int],
+            sc: FormatFunction,
+        ) -> Optional[Text]:
+            if line is None:
+                return None
+            if line.line_num is None:
+                return out
+            in_arrow = Text("  ")
+            out_arrow = Text()
+            if config.show_branches:
+                if line.line_num in btset:
+                    in_arrow = Text("~>", sc(str(line.line_num)))
+                if line.branch_target is not None:
+                    out_arrow = " " + Text("~>", sc(str(line.branch_target)))
+            formatted_line_num = Text(hex(line.line_num)[2:] + ":", line_color)
+            return formatted_line_num + " " + in_arrow + " " + out + out_arrow
+
+        part1 = format_part(out1, line1, line_color1, bts1, sc5)
+        part2 = format_part(out2, line2, line_color2, bts2, sc6)
+
+        if config.show_source and line2:
+            for source_line in line2.source_lines:
+                line_format = BasicFormat.SOURCE_OTHER
+                if config.source_old_binutils:
+                    if source_line and re.fullmatch(r".*\.c(?:pp)?:\d+", source_line):
+                        line_format = BasicFormat.SOURCE_FILENAME
+                    elif source_line and source_line.endswith("():"):
+                        line_format = BasicFormat.SOURCE_FUNCTION
+                        try:
+                            source_line = cxxfilt.demangle(
+                                source_line[:-3], external_only=False
+                            )
+                        except:
+                            pass
+                else:
+                    # File names and function names
+                    if source_line and source_line[0] != "│":
+                        line_format = BasicFormat.SOURCE_FILENAME
+                        # Function names
+                        if source_line.endswith("():"):
+                            line_format = BasicFormat.SOURCE_FUNCTION
+                            try:
+                                source_line = cxxfilt.demangle(
+                                    source_line[:-3], external_only=False
+                                )
+                            except:
+                                pass
+                padding = " " * 7 if config.show_line_numbers else " " * 2
+                output.append(
+                    OutputLine(
+                        base=None,
+                        fmt2=padding + Text(source_line, line_format),
+                        key2=source_line,
+                        boring=True,
+                        is_data_ref=False,
+                        line1=None,
+                        line2=None,
+                    )
+                )
+
+        key2 = line2.normalized_original if line2 else None
+        boring = False
+        if line_prefix == " ":
+            boring = True
+        elif config.compress and config.compress.same_instr and line_prefix in "irs":
+            boring = True
+
+        if config.show_line_numbers:
+            if line2 and line2.source_line_num is not None:
+                num_color = (
+                    BasicFormat.SOURCE_LINE_NUM
+                    if sym_color == BasicFormat.NONE
+                    else sym_color
+                )
+                num2 = Text(f"{line2.source_line_num:5}", num_color)
+            else:
+                num2 = Text(" " * 5)
+        else:
+            num2 = Text()
+
+        fmt2 = Text(line_prefix, sym_color) + num2 + " " + (part2 or Text())
+
+        output.append(
+            OutputLine(
+                base=part1,
+                fmt2=fmt2,
+                key2=key2,
+                boring=boring,
+                is_data_ref=is_data_ref,
+                line1=line1,
+                line2=line2,
+            )
+        )
+
+    output = output[config.skip_lines :]
+
+    score = score_diff_lines(diffed_lines, config, symbol_map)
+    max_score = len(lines1) * config.penalty_deletion
+    return Diff(lines=output, score=score, max_score=max_score)
+
+
+def chunk_diff_lines(
+    diff: List[OutputLine],
+) -> List[Union[List[OutputLine], OutputLine]]:
+    """Chunk a diff into an alternating list like A B A B ... A, where:
+    * A is a List[OutputLine] of insertions,
+    * B is a single non-insertion OutputLine, with .base != None."""
+    cur_right: List[OutputLine] = []
+    chunks: List[Union[List[OutputLine], OutputLine]] = []
+    for output_line in diff:
+        if output_line.base is not None:
+            chunks.append(cur_right)
+            chunks.append(output_line)
+            cur_right = []
+        else:
+            cur_right.append(output_line)
+    chunks.append(cur_right)
+    return chunks
+
+
+def compress_matching(
+    li: List[Tuple[OutputLine, ...]], context: int
+) -> List[Tuple[OutputLine, ...]]:
+    ret: List[Tuple[OutputLine, ...]] = []
+    matching_streak: List[Tuple[OutputLine, ...]] = []
+    context = max(context, 0)
+
+    def flush_matching() -> None:
+        if len(matching_streak) <= 2 * context + 1:
+            ret.extend(matching_streak)
+        else:
+            ret.extend(matching_streak[:context])
+            skipped = len(matching_streak) - 2 * context
+            filler = OutputLine(
+                base=Text(f"<{skipped} lines>", BasicFormat.SOURCE_OTHER),
+                fmt2=Text(),
+                key2=None,
+                boring=False,
+                is_data_ref=False,
+                line1=None,
+                line2=None,
+            )
+            columns = len(matching_streak[0])
+            ret.append(tuple([filler] * columns))
+            if context > 0:
+                ret.extend(matching_streak[-context:])
+        matching_streak.clear()
+
+    for line in li:
+        if line[0].boring:
+            matching_streak.append(line)
+        else:
+            flush_matching()
+            ret.append(line)
+
+    flush_matching()
+    return ret
+
+
+def align_diffs(old_diff: Diff, new_diff: Diff, config: Config) -> TableData:
+    headers: Tuple[Text, ...]
+    diff_lines: List[Tuple[OutputLine, ...]]
+    padding = " " * 7 if config.show_line_numbers else " " * 2
+
+    if config.diff_mode in (DiffMode.THREEWAY_PREV, DiffMode.THREEWAY_BASE):
+        old_chunks = chunk_diff_lines(old_diff.lines)
+        new_chunks = chunk_diff_lines(new_diff.lines)
+        diff_lines = []
+        empty = OutputLine(Text(), Text(), None, True, False, None, None)
+        assert len(old_chunks) == len(new_chunks), "same target"
+        for old_chunk, new_chunk in zip(old_chunks, new_chunks):
+            if isinstance(old_chunk, list):
+                assert isinstance(new_chunk, list)
+                if not old_chunk and not new_chunk:
+                    # Most of the time lines sync up without insertions/deletions,
+                    # and there's no interdiffing to be done.
+                    continue
+                differ = difflib.SequenceMatcher(
+                    a=old_chunk, b=new_chunk, autojunk=False
+                )
+                for tag, i1, i2, j1, j2 in differ.get_opcodes():
+                    if tag in ["equal", "replace"]:
+                        for i, j in zip(range(i1, i2), range(j1, j2)):
+                            diff_lines.append((empty, new_chunk[j], old_chunk[i]))
+                    if tag in ["insert", "replace"]:
+                        for j in range(j1 + i2 - i1, j2):
+                            diff_lines.append((empty, new_chunk[j], empty))
+                    if tag in ["delete", "replace"]:
+                        for i in range(i1 + j2 - j1, i2):
+                            diff_lines.append((empty, empty, old_chunk[i]))
+            else:
+                assert isinstance(new_chunk, OutputLine)
+                # old_chunk.base and new_chunk.base have the same text since
+                # both diffs are based on the same target, but they might
+                # differ in color. Use the new version.
+                diff_lines.append((new_chunk, new_chunk, old_chunk))
+        diff_lines = [
+            (base, new, old if old != new else empty) for base, new, old in diff_lines
+        ]
+        headers = (
+            Text("TARGET"),
+            Text(f"{padding}CURRENT ({new_diff.score})"),
+            Text(f"{padding}PREVIOUS ({old_diff.score})"),
+        )
+        current_score = new_diff.score
+        max_score = new_diff.max_score
+        previous_score = old_diff.score
+    elif config.diff_mode in (DiffMode.SINGLE, DiffMode.SINGLE_BASE):
+        header = Text("BASE" if config.diff_mode == DiffMode.SINGLE_BASE else "CURRENT")
+        diff_lines = [(line,) for line in new_diff.lines]
+        headers = (header,)
+        # Scoring is disabled for view mode
+        current_score = 0
+        max_score = 0
+        previous_score = None
+    else:
+        diff_lines = [(line, line) for line in new_diff.lines]
+        headers = (
+            Text("TARGET"),
+            Text(f"{padding}CURRENT ({new_diff.score})"),
+        )
+        current_score = new_diff.score
+        max_score = new_diff.max_score
+        previous_score = None
+    if config.compress:
+        diff_lines = compress_matching(diff_lines, config.compress.context)
+
+    def diff_line_to_table_line(line: Tuple[OutputLine, ...]) -> TableLine:
+        cells = [
+            (line[0].base or Text(), line[0].line1),
+        ]
+        for ol in line[1:]:
+            cells.append((ol.fmt2, ol.line2))
+
+        return TableLine(
+            key=line[0].key2,
+            is_data_ref=line[0].is_data_ref,
+            cells=tuple(cells),
+        )
+
+    return TableData(
+        headers=headers,
+        current_score=current_score,
+        max_score=max_score,
+        previous_score=previous_score,
+        lines=[diff_line_to_table_line(line) for line in diff_lines],
+    )
+
+
+def debounced_fs_watch(
+    targets: List[str],
+    outq: "queue.Queue[Optional[float]]",
+    config: Config,
+    project: ProjectSettings,
+) -> None:
+    import watchdog.events
+    import watchdog.observers
+
+    class WatchEventHandler(watchdog.events.FileSystemEventHandler):
+        def __init__(
+            self, queue: "queue.Queue[float]", file_targets: List[str]
+        ) -> None:
+            self.queue = queue
+            self.file_targets = file_targets
+
+        def on_modified(self, ev: object) -> None:
+            if isinstance(ev, watchdog.events.FileModifiedEvent):
+                self.changed(ev.src_path)
+
+        def on_moved(self, ev: object) -> None:
+            if isinstance(ev, watchdog.events.FileMovedEvent):
+                self.changed(ev.dest_path)
+
+        def should_notify(self, path: str) -> bool:
+            for target in self.file_targets:
+                if os.path.normpath(path) == target:
+                    return True
+            if config.make and any(
+                path.endswith(suffix) for suffix in project.source_extensions
+            ):
+                return True
+            return False
+
+        def changed(self, path: str) -> None:
+            if self.should_notify(path):
+                self.queue.put(time.time())
+
+    def debounce_thread() -> NoReturn:
+        listenq: "queue.Queue[float]" = queue.Queue()
+        file_targets: List[str] = []
+        event_handler = WatchEventHandler(listenq, file_targets)
+        observer = watchdog.observers.Observer()
+        observed = set()
+        for target in targets:
+            if os.path.isdir(target):
+                observer.schedule(event_handler, target, recursive=True)  # type: ignore
+            else:
+                file_targets.append(os.path.normpath(target))
+                target = os.path.dirname(target) or "."
+                if target not in observed:
+                    observed.add(target)
+                    observer.schedule(event_handler, target)  # type: ignore
+        observer.start()  # type: ignore
+        while True:
+            t = listenq.get()
+            more = True
+            while more:
+                delay = t + DEBOUNCE_DELAY - time.time()
+                if delay > 0:
+                    time.sleep(delay)
+                # consume entire queue
+                more = False
+                try:
+                    while True:
+                        t = listenq.get(block=False)
+                        more = True
+                except queue.Empty:
+                    pass
+            outq.put(t)
+
+    th = threading.Thread(target=debounce_thread, daemon=True)
+    th.start()
+
+
+class Display:
+    basedump: str
+    mydump: str
+    last_refresh_key: object
+    config: Config
+    emsg: Optional[str]
+    last_diff_output: Optional[Diff]
+    pending_update: Optional[str]
+    ready_queue: "queue.Queue[None]"
+    watch_queue: "queue.Queue[Optional[float]]"
+    less_proc: "Optional[subprocess.Popen[bytes]]"
+
+    def __init__(self, basedump: str, mydump: str, config: Config) -> None:
+        self.config = config
+        self.base_lines = process(basedump, config)
+        self.mydump = mydump
+        self.emsg = None
+        self.last_refresh_key = None
+        self.last_diff_output = None
+
+    def run_diff(self) -> Tuple[str, object]:
+        if self.emsg is not None:
+            return (self.emsg, self.emsg)
+
+        my_lines = process(self.mydump, self.config)
+
+        if self.config.diff_mode == DiffMode.SINGLE_BASE:
+            diff_output = do_diff(self.base_lines, self.base_lines, self.config)
+        elif self.config.diff_mode == DiffMode.SINGLE:
+            diff_output = do_diff(my_lines, my_lines, self.config)
+        else:
+            diff_output = do_diff(self.base_lines, my_lines, self.config)
+
+        last_diff_output = self.last_diff_output or diff_output
+        if self.config.diff_mode != DiffMode.THREEWAY_BASE or not self.last_diff_output:
+            self.last_diff_output = diff_output
+
+        data = align_diffs(last_diff_output, diff_output, self.config)
+        output = self.config.formatter.table(data)
+
+        refresh_key = (
+            [line.key2 for line in diff_output.lines],
+            diff_output.score,
+        )
+
+        return (output, refresh_key)
+
+    def run_less(
+        self, output: str
+    ) -> "Tuple[subprocess.Popen[bytes], subprocess.Popen[bytes]]":
+        # Pipe the output through 'tail' and only then to less, to ensure the
+        # write call doesn't block. ('tail' has to buffer all its input before
+        # it starts writing.) This also means we don't have to deal with pipe
+        # closure errors.
+        buffer_proc = subprocess.Popen(
+            BUFFER_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE
+        )
+        less_proc = subprocess.Popen(LESS_CMD, stdin=buffer_proc.stdout)
+        assert buffer_proc.stdin
+        assert buffer_proc.stdout
+        buffer_proc.stdin.write(output.encode())
+        buffer_proc.stdin.close()
+        buffer_proc.stdout.close()
+        return (buffer_proc, less_proc)
+
+    def run_sync(self) -> None:
+        output, _ = self.run_diff()
+        proca, procb = self.run_less(output)
+        procb.wait()
+        proca.wait()
+
+    def run_async(self, watch_queue: "queue.Queue[Optional[float]]") -> None:
+        self.watch_queue = watch_queue
+        self.ready_queue = queue.Queue()
+        self.pending_update = None
+        output, refresh_key = self.run_diff()
+        self.last_refresh_key = refresh_key
+        dthread = threading.Thread(target=self.display_thread, args=(output,))
+        dthread.start()
+        self.ready_queue.get()
+
+    def display_thread(self, initial_output: str) -> None:
+        proca, procb = self.run_less(initial_output)
+        self.less_proc = procb
+        self.ready_queue.put(None)
+        while True:
+            ret = procb.wait()
+            proca.wait()
+            self.less_proc = None
+            if ret != 0:
+                # fix the terminal
+                os.system("tput reset")
+            if ret != 0 and self.pending_update is not None:
+                # killed by program with the intent to refresh
+                output = self.pending_update
+                self.pending_update = None
+                proca, procb = self.run_less(output)
+                self.less_proc = procb
+                self.ready_queue.put(None)
+            else:
+                # terminated by user, or killed
+                self.watch_queue.put(None)
+                self.ready_queue.put(None)
+                break
+
+    def progress(self, msg: str) -> None:
+        # Write message to top-left corner
+        sys.stdout.write("\x1b7\x1b[1;1f{}\x1b8".format(msg + " "))
+        sys.stdout.flush()
+
+    def update(self, text: str, error: bool) -> None:
+        if not error and not self.emsg and text == self.mydump:
+            self.progress("Unchanged. ")
+            return
+        if not error:
+            self.mydump = text
+            self.emsg = None
+        else:
+            self.emsg = text
+        output, refresh_key = self.run_diff()
+        if refresh_key == self.last_refresh_key:
+            self.progress("Unchanged. ")
+            return
+        self.last_refresh_key = refresh_key
+        self.pending_update = output
+        if not self.less_proc:
+            return
+        self.less_proc.kill()
+        self.ready_queue.get()
+
+    def terminate(self) -> None:
+        if not self.less_proc:
+            return
+        self.less_proc.kill()
+        self.ready_queue.get()
+
+
+def main() -> None:
+    # Prefer to use diff_settings.py from the current working directory
+    sys.path.insert(0, ".")
+    try:
+        import diff_settings
+    except ModuleNotFoundError:
+        fail("Unable to find diff_settings.py in the same directory.")
+    sys.path.pop(0)
+
+    try:
+        import argcomplete
+    except ModuleNotFoundError:
+        argcomplete = None  # type: ignore
+
+    if argcomplete:
+
+        def complete_symbol(
+            prefix: str, parsed_args: argparse.Namespace, **kwargs: object
+        ) -> List[str]:
+            if not prefix or prefix.startswith("-"):
+                # skip reading the map file, which would
+                # result in a lot of useless completions
+                return []
+            config: Dict[str, Any] = {}
+            diff_settings.apply(config, parsed_args)  # type: ignore
+            mapfile = config.get("mapfile")
+            if not mapfile:
+                return []
+            completes = []
+            encoding = MAPFILE_ENCODING
+            # We assume the encoding is self-synchronizing,
+            # meaning that for example finding bytes corresponding to the space
+            # character, is equivalent to finding a space character.
+            # This is true for ASCII and UTF-8 data, for example.
+            # This allows processing the map file as bytes instead of entirely
+            # processing it as decoded text, which is slow.
+            space = " ".encode(encoding)
+            line_return = "\n".encode(encoding)
+            with open(mapfile, "rb") as f:
+                import mmap
+
+                data = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
+                # assume symbols are prefixed by a space character
+                search = f" {prefix}".encode(encoding)
+                pos = data.find(search)
+                while pos != -1:
+                    # skip the space character in the search string
+                    pos += len(space)
+                    # assume symbols are suffixed by either a space
+                    # character or a (unix-style) line return
+                    spacePos = data.find(space, pos)
+                    lineReturnPos = data.find(line_return, pos)
+                    if lineReturnPos == -1:
+                        endPos = spacePos
+                    elif spacePos == -1:
+                        endPos = lineReturnPos
+                    else:
+                        endPos = min(spacePos, lineReturnPos)
+                    if endPos == -1:
+                        match = data[pos:]
+                        pos = -1
+                    else:
+                        match = data[pos:endPos]
+                        pos = data.find(search, endPos)
+                    completes.append(match.decode(encoding))
+            return completes
+
+        setattr(start_argument, "completer", complete_symbol)
+
+    # Project-specific flags, e.g. different versions/make arguments.
+    add_custom_arguments_fn = getattr(diff_settings, "add_custom_arguments", None)
+    if add_custom_arguments_fn:
+        add_custom_arguments_fn(parser)
+
+    if argcomplete:
+        argcomplete.autocomplete(parser)
+
+    args = parser.parse_args()
+
+    # Apply project-specific configuration.
+    settings: Dict[str, Any] = {}
+    diff_settings.apply(settings, args)  # type: ignore
+    project = create_project_settings(settings)
+
+    try:
+        config = create_config(args, project)
+    except ValueError as e:
+        fail(str(e))
+
+    if config.algorithm == "levenshtein":
+        try:
+            import Levenshtein
+        except ModuleNotFoundError as e:
+            fail(MISSING_PREREQUISITES.format(e.name))
+
+    if config.show_source:
+        try:
+            import cxxfilt
+        except ModuleNotFoundError as e:
+            fail(MISSING_PREREQUISITES.format(e.name))
+
+    if (
+        config.diff_mode in (DiffMode.THREEWAY_BASE, DiffMode.THREEWAY_PREV)
+        and not args.watch
+    ):
+        fail("Threeway diffing requires -w.")
+
+    if args.diff_elf_symbol:
+        make_target, basecmd, mycmd = dump_elf(
+            args.start, args.end, args.diff_elf_symbol, config, project
+        )
+    elif config.diff_obj:
+        make_target, basecmd, mycmd = dump_objfile(
+            args.start, args.end, config, project
+        )
+    else:
+        make_target, basecmd, mycmd = dump_binary(args.start, args.end, config, project)
+
+    map_build_target_fn = getattr(diff_settings, "map_build_target", None)
+    if map_build_target_fn:
+        make_target = map_build_target_fn(make_target=make_target)
+
+    if args.write_asm is not None:
+        mydump = run_objdump(mycmd, config, project)
+        with open(args.write_asm, "w") as f:
+            f.write(mydump)
+        print(f"Wrote assembly to {args.write_asm}.")
+        sys.exit(0)
+
+    if args.base_asm is not None:
+        with open(args.base_asm) as f:
+            basedump = f.read()
+    elif config.diff_mode != DiffMode.SINGLE:
+        basedump = run_objdump(basecmd, config, project)
+    else:
+        basedump = ""
+
+    mydump = run_objdump(mycmd, config, project)
+
+    display = Display(basedump, mydump, config)
+
+    if args.no_pager or args.format in ("html", "json"):
+        print(display.run_diff()[0])
+    elif not args.watch:
+        display.run_sync()
+    else:
+        if not args.make and not args.agree:
+            yn = input(
+                "Warning: watch-mode (-w) enabled without auto-make (-m) or agree-all (-y). "
+                "You will have to run make manually. Ok? (Y/n) "
+            )
+            if yn.lower() == "n":
+                return
+        if args.make:
+            watch_sources = None
+            watch_sources_for_target_fn = getattr(
+                diff_settings, "watch_sources_for_target", None
+            )
+            if watch_sources_for_target_fn:
+                watch_sources = watch_sources_for_target_fn(make_target)
+            watch_sources = watch_sources or project.source_directories
+            if not watch_sources:
+                fail("Missing source_directories config, don't know what to watch.")
+        else:
+            watch_sources = [make_target]
+        q: "queue.Queue[Optional[float]]" = queue.Queue()
+        debounced_fs_watch(watch_sources, q, config, project)
+        display.run_async(q)
+        last_build = 0.0
+        try:
+            while True:
+                t = q.get()
+                if t is None:
+                    break
+                if t < last_build:
+                    continue
+                last_build = time.time()
+                if args.make:
+                    display.progress("Building...")
+                    ret = run_make_capture_output(make_target, project)
+                    if ret.returncode != 0:
+                        display.update(
+                            ret.stderr.decode("utf-8-sig", "replace")
+                            or ret.stdout.decode("utf-8-sig", "replace"),
+                            error=True,
+                        )
+                        continue
+                mydump = run_objdump(mycmd, config, project)
+                display.update(mydump, error=False)
+        except KeyboardInterrupt:
+            display.terminate()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/diff.py b/diff.py
index a38fa4d..9bfa69f 100755
--- a/diff.py
+++ b/diff.py
@@ -1,4017 +1,5 @@
 #!/usr/bin/env python3
 # PYTHON_ARGCOMPLETE_OK
-import argparse
-import enum
-import sys
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterator,
-    List,
-    Sequence,
-    Match,
-    NoReturn,
-    Optional,
-    Pattern,
-    Set,
-    Tuple,
-    Type,
-    Union,
-)
+from asm_differ.diff import main
 
-
-def fail(msg: str) -> NoReturn:
-    print(msg, file=sys.stderr)
-    sys.exit(1)
-
-
-def static_assert_unreachable(x: NoReturn) -> NoReturn:
-    raise Exception("Unreachable! " + repr(x))
-
-
-# The encoding to use for reading the map file to text.
-# It is expected the map file will be ASCII-only or mostly,
-# which UTF-8 is compatible with.
-MAPFILE_ENCODING = "UTF-8"
-# In case the map file isn't UTF-8 and some bytes can't be decoded as UTF-8,
-# don't error and subtitute with U+FFFD.
-# (cf the Python documentation on `codecs.replace_errors`)
-MAPFILE_ENCODING_ERROR_HANDLER = "replace"
-
-
-class DiffMode(enum.Enum):
-    SINGLE = "single"
-    SINGLE_BASE = "single_base"
-    NORMAL = "normal"
-    THREEWAY_PREV = "3prev"
-    THREEWAY_BASE = "3base"
-
-
-# ==== COMMAND-LINE ====
-
-if __name__ == "__main__":
-    # Prefer to use diff_settings.py from the current working directory
-    sys.path.insert(0, ".")
-    try:
-        import diff_settings
-    except ModuleNotFoundError:
-        fail("Unable to find diff_settings.py in the same directory.")
-    sys.path.pop(0)
-
-    try:
-        import argcomplete
-    except ModuleNotFoundError:
-        argcomplete = None  # type: ignore
-
-    parser = argparse.ArgumentParser(
-        description="Diff MIPS, PPC, AArch64, ARM32, SH2, SH4, or m68k assembly."
-    )
-
-    start_argument = parser.add_argument(
-        "start",
-        help="Function name or address to start diffing from.",
-    )
-
-    if argcomplete:
-
-        def complete_symbol(
-            prefix: str, parsed_args: argparse.Namespace, **kwargs: object
-        ) -> List[str]:
-            if not prefix or prefix.startswith("-"):
-                # skip reading the map file, which would
-                # result in a lot of useless completions
-                return []
-            config: Dict[str, Any] = {}
-            diff_settings.apply(config, parsed_args)  # type: ignore
-            mapfile = config.get("mapfile")
-            if not mapfile:
-                return []
-            completes = []
-            encoding = MAPFILE_ENCODING
-            # We assume the encoding is self-synchronizing,
-            # meaning that for example finding bytes corresponding to the space
-            # character, is equivalent to finding a space character.
-            # This is true for ASCII and UTF-8 data, for example.
-            # This allows processing the map file as bytes instead of entirely
-            # processing it as decoded text, which is slow.
-            space = " ".encode(encoding)
-            line_return = "\n".encode(encoding)
-            with open(mapfile, "rb") as f:
-                import mmap
-
-                data = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
-                # assume symbols are prefixed by a space character
-                search = f" {prefix}".encode(encoding)
-                pos = data.find(search)
-                while pos != -1:
-                    # skip the space character in the search string
-                    pos += len(space)
-                    # assume symbols are suffixed by either a space
-                    # character or a (unix-style) line return
-                    spacePos = data.find(space, pos)
-                    lineReturnPos = data.find(line_return, pos)
-                    if lineReturnPos == -1:
-                        endPos = spacePos
-                    elif spacePos == -1:
-                        endPos = lineReturnPos
-                    else:
-                        endPos = min(spacePos, lineReturnPos)
-                    if endPos == -1:
-                        match = data[pos:]
-                        pos = -1
-                    else:
-                        match = data[pos:endPos]
-                        pos = data.find(search, endPos)
-                    completes.append(match.decode(encoding))
-            return completes
-
-        setattr(start_argument, "completer", complete_symbol)
-
-    parser.add_argument(
-        "end",
-        nargs="?",
-        help="Address to end diff at.",
-    )
-    parser.add_argument(
-        "-o",
-        dest="diff_obj",
-        action="store_true",
-        help="""Diff .o files rather than a whole binary. This makes it possible to
-        see symbol names. (Recommended)""",
-    )
-    parser.add_argument(
-        "-f",
-        "--file",
-        dest="file",
-        type=str,
-        help="""File path for a file being diffed. When used the map
-        file isn't searched for the function given. Useful for dynamically
-        linked libraries.""",
-    )
-    parser.add_argument(
-        "-e",
-        "--elf",
-        dest="diff_elf_symbol",
-        metavar="SYMBOL",
-        help="""Diff a given function in two ELFs, one being stripped and the other
-        one non-stripped. Requires objdump from binutils 2.33+.""",
-    )
-    parser.add_argument(
-        "-c",
-        "--source",
-        dest="show_source",
-        action="store_true",
-        help="Show source code (if possible). Only works with -o or -e.",
-    )
-    parser.add_argument(
-        "-C",
-        "--source-old-binutils",
-        dest="source_old_binutils",
-        action="store_true",
-        help="""Tweak --source handling to make it work with binutils < 2.33.
-        Implies --source.""",
-    )
-    parser.add_argument(
-        "-j",
-        "--section",
-        dest="diff_section",
-        default=".text",
-        metavar="SECTION",
-        help="Diff restricted to a given output section.",
-    )
-    parser.add_argument(
-        "-L",
-        "--line-numbers",
-        dest="show_line_numbers",
-        action="store_const",
-        const=True,
-        help="""Show source line numbers in output, when available. May be enabled by
-        default depending on diff_settings.py.""",
-    )
-    parser.add_argument(
-        "--no-line-numbers",
-        dest="show_line_numbers",
-        action="store_const",
-        const=False,
-        help="Hide source line numbers in output.",
-    )
-    parser.add_argument(
-        "--inlines",
-        dest="inlines",
-        action="store_true",
-        help="Show inline function calls (if possible). Only works with -o or -e.",
-    )
-    parser.add_argument(
-        "--base-asm",
-        dest="base_asm",
-        metavar="FILE",
-        help="Read assembly from given file instead of configured base img.",
-    )
-    parser.add_argument(
-        "--write-asm",
-        dest="write_asm",
-        metavar="FILE",
-        help="Write the current assembly output to file, e.g. for use with --base-asm.",
-    )
-    parser.add_argument(
-        "-m",
-        "--make",
-        dest="make",
-        action="store_true",
-        help="Automatically run 'make' on the .o file or binary before diffing.",
-    )
-    parser.add_argument(
-        "-l",
-        "--skip-lines",
-        dest="skip_lines",
-        metavar="LINES",
-        type=int,
-        default=0,
-        help="Skip the first LINES lines of output.",
-    )
-    parser.add_argument(
-        "-s",
-        "--stop-at-ret",
-        dest="stop_at_ret",
-        action="count",
-        help="""Stop disassembling at the first return instruction.
-        You can also pass -ss to stop at the second return instruction, and so on.""",
-    )
-    parser.add_argument(
-        "-i",
-        "--ignore-large-imms",
-        dest="ignore_large_imms",
-        action="store_true",
-        help="Pretend all large enough immediates are the same.",
-    )
-    parser.add_argument(
-        "-I",
-        "--ignore-addr-diffs",
-        dest="ignore_addr_diffs",
-        action="store_true",
-        help="Ignore address differences. Currently only affects AArch64 and ARM32.",
-    )
-    parser.add_argument(
-        "-B",
-        "--no-show-branches",
-        dest="show_branches",
-        action="store_false",
-        help="Don't visualize branches/branch targets.",
-    )
-    parser.add_argument(
-        "-R",
-        "--no-show-rodata-refs",
-        dest="show_rodata_refs",
-        action="store_false",
-        help="Don't show .rodata -> .text references (typically from jump tables).",
-    )
-    parser.add_argument(
-        "-S",
-        "--base-shift",
-        dest="base_shift",
-        metavar="N",
-        type=str,
-        default="0",
-        help="""Diff position N in our img against position N + shift in the base img.
-        Arithmetic is allowed, so e.g. |-S "0x1234 - 0x4321"| is a reasonable
-        flag to pass if it is known that position 0x1234 in the base img syncs
-        up with position 0x4321 in our img. Not supported together with -o.""",
-    )
-    parser.add_argument(
-        "-w",
-        "--watch",
-        dest="watch",
-        action="store_true",
-        help="""Automatically update when source/object files change.
-        Recommended in combination with -m.""",
-    )
-    parser.add_argument(
-        "-y",
-        "--yes",
-        dest="agree",
-        action="store_true",
-        help="""Automatically agree to any yes/no questions asked.
-        Useful if you really want to use the -w option without -m.""",
-    )
-    parser.add_argument(
-        "-0",
-        "--diff_mode=single_base",
-        dest="diff_mode",
-        action="store_const",
-        const=DiffMode.SINGLE_BASE,
-        help="""View the base asm only (not a diff).""",
-    )
-    parser.add_argument(
-        "-1",
-        "--diff_mode=single",
-        dest="diff_mode",
-        action="store_const",
-        const=DiffMode.SINGLE,
-        help="""View the current asm only (not a diff).""",
-    )
-    parser.add_argument(
-        "-3",
-        "--threeway=prev",
-        dest="diff_mode",
-        action="store_const",
-        const=DiffMode.THREEWAY_PREV,
-        help="""Show a three-way diff between target asm, current asm, and asm
-        prior to -w rebuild. Requires -w.""",
-    )
-    parser.add_argument(
-        "-b",
-        "--threeway=base",
-        dest="diff_mode",
-        action="store_const",
-        const=DiffMode.THREEWAY_BASE,
-        help="""Show a three-way diff between target asm, current asm, and asm
-        when diff.py was started. Requires -w.""",
-    )
-    parser.add_argument(
-        "--width",
-        dest="column_width",
-        metavar="COLS",
-        type=int,
-        default=50,
-        help="Sets the width of the left and right view column.",
-    )
-    parser.add_argument(
-        "--algorithm",
-        dest="algorithm",
-        default="levenshtein",
-        choices=["levenshtein", "difflib"],
-        help="""Diff algorithm to use. Levenshtein gives the minimum diff, while difflib
-        aims for long sections of equal opcodes. Defaults to %(default)s.""",
-    )
-    parser.add_argument(
-        "--max-size",
-        "--max-lines",
-        metavar="LINES",
-        dest="max_lines",
-        type=int,
-        default=1024,
-        help="The maximum length of the diff, in lines.",
-    )
-    parser.add_argument(
-        "--no-pager",
-        dest="no_pager",
-        action="store_true",
-        help="""Disable the pager; write output directly to stdout, then exit.
-        Incompatible with --watch.""",
-    )
-    parser.add_argument(
-        "--format",
-        choices=("color", "plain", "html", "json"),
-        default="color",
-        help="Output format, default is color. --format=html or json implies --no-pager.",
-    )
-    parser.add_argument(
-        "-U",
-        "--compress-matching",
-        metavar="N",
-        dest="compress_matching",
-        type=int,
-        help="""Compress streaks of matching lines, leaving N lines of context
-        around non-matching parts.""",
-    )
-    parser.add_argument(
-        "-V",
-        "--compress-sameinstr",
-        metavar="N",
-        dest="compress_sameinstr",
-        type=int,
-        help="""Compress streaks of lines with same instructions (but possibly
-        different regalloc), leaving N lines of context around other parts.""",
-    )
-    parser.add_argument(
-        "-d",
-        "--diff-function-symbols",
-        dest="diff_function_symbols",
-        action="store_true",
-        help="Include and diff function symbols.",
-    )
-
-    # Project-specific flags, e.g. different versions/make arguments.
-    add_custom_arguments_fn = getattr(diff_settings, "add_custom_arguments", None)
-    if add_custom_arguments_fn:
-        add_custom_arguments_fn(parser)
-
-    if argcomplete:
-        argcomplete.autocomplete(parser)
-
-# ==== IMPORTS ====
-
-# (We do imports late to optimize auto-complete performance.)
-
-import abc
-from collections import Counter, defaultdict
-from dataclasses import asdict, dataclass, field, replace
-import difflib
-import html
-import itertools
-import json
-import os
-import queue
-import re
-import string
-import struct
-import subprocess
-import threading
-import time
-import traceback
-
-
-MISSING_PREREQUISITES = (
-    "Missing prerequisite python module {}. "
-    "Run `python3 -m pip install --user colorama watchdog levenshtein cxxfilt` to install prerequisites (cxxfilt only needed with --source)."
-)
-
-try:
-    from colorama import Back, Fore, Style
-    import watchdog
-except ModuleNotFoundError as e:
-    fail(MISSING_PREREQUISITES.format(e.name))
-
-# ==== CONFIG ====
-
-
-@dataclass
-class ProjectSettings:
-    arch_str: str
-    objdump_executable: str
-    objdump_flags: List[str]
-    build_command: List[str]
-    map_format: str
-    build_dir: str
-    map_address_offset: int
-    baseimg: Optional[str]
-    myimg: Optional[str]
-    mapfile: Optional[str]
-    source_directories: Optional[List[str]]
-    source_extensions: List[str]
-    show_line_numbers_default: bool
-    disassemble_all: bool
-    reg_categories: Dict[str, int]
-    expected_dir: str
-
-
-@dataclass
-class Compress:
-    context: int
-    same_instr: bool
-
-
-@dataclass
-class Config:
-    arch: "ArchSettings"
-
-    # Build/objdump options
-    diff_obj: bool
-    file: Optional[str]
-    make: bool
-    source_old_binutils: bool
-    diff_section: str
-    inlines: bool
-    max_function_size_lines: int
-    max_function_size_bytes: int
-
-    # Display options
-    formatter: "Formatter"
-    diff_mode: DiffMode
-    base_shift: int
-    skip_lines: int
-    compress: Optional[Compress]
-    show_rodata_refs: bool
-    show_branches: bool
-    show_line_numbers: bool
-    show_source: bool
-    stop_at_ret: Optional[int]
-    ignore_large_imms: bool
-    ignore_addr_diffs: bool
-    algorithm: str
-    reg_categories: Dict[str, int]
-    diff_function_symbols: bool
-
-    # Score options
-    score_stack_differences = True
-    penalty_stackdiff = 1
-    penalty_regalloc = 5
-    penalty_reordering = 60
-    penalty_insertion = 100
-    penalty_deletion = 100
-
-
-def create_project_settings(settings: Dict[str, Any]) -> ProjectSettings:
-    return ProjectSettings(
-        arch_str=settings.get("arch", "mips"),
-        baseimg=settings.get("baseimg"),
-        myimg=settings.get("myimg"),
-        mapfile=settings.get("mapfile"),
-        build_command=settings.get(
-            "make_command", ["make", *settings.get("makeflags", [])]
-        ),
-        source_directories=settings.get("source_directories"),
-        source_extensions=settings.get(
-            "source_extensions", [".c", ".h", ".cpp", ".hpp", ".s"]
-        ),
-        objdump_executable=get_objdump_executable(settings.get("objdump_executable")),
-        objdump_flags=settings.get("objdump_flags", []),
-        expected_dir=settings.get("expected_dir", "expected/"),
-        map_format=settings.get("map_format", "gnu"),
-        map_address_offset=settings.get(
-            "map_address_offset", settings.get("ms_map_address_offset", 0)
-        ),
-        build_dir=settings.get("build_dir", settings.get("mw_build_dir", "build/")),
-        show_line_numbers_default=settings.get("show_line_numbers_default", True),
-        disassemble_all=settings.get("disassemble_all", False),
-        reg_categories=settings.get("reg_categories", {}),
-    )
-
-
-def create_config(args: argparse.Namespace, project: ProjectSettings) -> Config:
-    arch = get_arch(project.arch_str)
-
-    formatter: Formatter
-    if args.format == "plain":
-        formatter = PlainFormatter(column_width=args.column_width)
-    elif args.format == "color":
-        formatter = AnsiFormatter(column_width=args.column_width)
-    elif args.format == "html":
-        formatter = HtmlFormatter()
-    elif args.format == "json":
-        formatter = JsonFormatter(arch_str=arch.name)
-    else:
-        raise ValueError(f"Unsupported --format: {args.format}")
-
-    compress = None
-    if args.compress_matching is not None:
-        compress = Compress(args.compress_matching, False)
-    if args.compress_sameinstr is not None:
-        if compress is not None:
-            raise ValueError(
-                "Cannot pass both --compress-matching and --compress-sameinstr"
-            )
-        compress = Compress(args.compress_sameinstr, True)
-
-    show_line_numbers = args.show_line_numbers
-    if show_line_numbers is None:
-        show_line_numbers = project.show_line_numbers_default
-
-    return Config(
-        arch=arch,
-        # Build/objdump options
-        diff_obj=args.diff_obj,
-        file=args.file,
-        make=args.make,
-        source_old_binutils=args.source_old_binutils
-        or "llvm-" in project.objdump_executable,
-        diff_section=args.diff_section,
-        inlines=args.inlines,
-        max_function_size_lines=args.max_lines,
-        max_function_size_bytes=args.max_lines * 4,
-        # Display options
-        formatter=formatter,
-        diff_mode=args.diff_mode or DiffMode.NORMAL,
-        base_shift=eval_int(
-            args.base_shift, "Failed to parse --base-shift (-S) argument as an integer."
-        ),
-        skip_lines=args.skip_lines,
-        compress=compress,
-        show_rodata_refs=args.show_rodata_refs,
-        show_branches=args.show_branches,
-        show_line_numbers=show_line_numbers,
-        show_source=args.show_source or args.source_old_binutils,
-        stop_at_ret=args.stop_at_ret,
-        ignore_large_imms=args.ignore_large_imms,
-        ignore_addr_diffs=args.ignore_addr_diffs,
-        algorithm=args.algorithm,
-        reg_categories=project.reg_categories,
-        diff_function_symbols=args.diff_function_symbols,
-    )
-
-
-def get_objdump_executable(objdump_executable: Optional[str]) -> str:
-    if objdump_executable is not None:
-        return objdump_executable
-
-    objdump_candidates = [
-        "mips-linux-gnu-objdump",
-        "mips64-elf-objdump",
-        "mips-elf-objdump",
-        "sh-elf-objdump",
-        "sh4-linux-gnu-objdump",
-        "m68k-elf-objdump",
-    ]
-    for objdump_cand in objdump_candidates:
-        try:
-            subprocess.check_call(
-                [objdump_cand, "--version"],
-                stdout=subprocess.DEVNULL,
-                stderr=subprocess.DEVNULL,
-            )
-            return objdump_cand
-        except subprocess.CalledProcessError:
-            pass
-        except FileNotFoundError:
-            pass
-
-    return fail(
-        f"Missing binutils; please ensure {' or '.join(objdump_candidates)} exists, or configure objdump_executable."
-    )
-
-
-def get_arch(arch_str: str) -> "ArchSettings":
-    for settings in ARCH_SETTINGS:
-        if arch_str == settings.name:
-            return settings
-    raise ValueError(f"Unknown architecture: {arch_str}")
-
-
-BUFFER_CMD: List[str] = ["tail", "-c", str(10**9)]
-
-# -S truncates long lines instead of wrapping them
-# -R interprets color escape sequences
-# -i ignores case when searching
-# -c something about how the screen gets redrawn; I don't remember the purpose
-# -#6 makes left/right arrow keys scroll by 6 characters
-LESS_CMD: List[str] = ["less", "-SRic", "-+F", "-+X", "-#6"]
-
-DEBOUNCE_DELAY: float = 0.1
-
-# ==== FORMATTING ====
-
-
-@enum.unique
-class BasicFormat(enum.Enum):
-    NONE = enum.auto()
-    IMMEDIATE = enum.auto()
-    STACK = enum.auto()
-    REGISTER = enum.auto()
-    REGISTER_CATEGORY = enum.auto()
-    DELAY_SLOT = enum.auto()
-    DIFF_CHANGE = enum.auto()
-    DIFF_ADD = enum.auto()
-    DIFF_REMOVE = enum.auto()
-    SOURCE_FILENAME = enum.auto()
-    SOURCE_FUNCTION = enum.auto()
-    SOURCE_LINE_NUM = enum.auto()
-    SOURCE_OTHER = enum.auto()
-
-
-@dataclass(frozen=True)
-class RotationFormat:
-    group: str
-    index: int
-    key: str
-
-
-Format = Union[BasicFormat, RotationFormat]
-FormatFunction = Callable[[str], Format]
-
-
-class Text:
-    segments: List[Tuple[str, Format]]
-
-    def __init__(self, line: str = "", f: Format = BasicFormat.NONE) -> None:
-        self.segments = [(line, f)] if line else []
-
-    def reformat(self, f: Format) -> "Text":
-        return Text(self.plain(), f)
-
-    def plain(self) -> str:
-        return "".join(s for s, f in self.segments)
-
-    def __repr__(self) -> str:
-        return f"<Text: {self.plain()!r}>"
-
-    def __bool__(self) -> bool:
-        return any(s for s, f in self.segments)
-
-    def __str__(self) -> str:
-        # Use Formatter.apply(...) instead
-        return NotImplemented
-
-    def __eq__(self, other: object) -> bool:
-        return NotImplemented
-
-    def __add__(self, other: Union["Text", str]) -> "Text":
-        if isinstance(other, str):
-            other = Text(other)
-        result = Text()
-        # If two adjacent segments have the same format, merge their lines
-        if (
-            self.segments
-            and other.segments
-            and self.segments[-1][1] == other.segments[0][1]
-        ):
-            result.segments = (
-                self.segments[:-1]
-                + [(self.segments[-1][0] + other.segments[0][0], self.segments[-1][1])]
-                + other.segments[1:]
-            )
-        else:
-            result.segments = self.segments + other.segments
-        return result
-
-    def __radd__(self, other: Union["Text", str]) -> "Text":
-        if isinstance(other, str):
-            other = Text(other)
-        return other + self
-
-    def finditer(self, pat: Pattern[str]) -> Iterator[Match[str]]:
-        """Replacement for `pat.finditer(text)` that operates on the inner text,
-        and returns the exact same matches as `Text.sub(pat, ...)`."""
-        for chunk, f in self.segments:
-            for match in pat.finditer(chunk):
-                yield match
-
-    def sub(self, pat: Pattern[str], sub_fn: Callable[[Match[str]], "Text"]) -> "Text":
-        result = Text()
-        for chunk, f in self.segments:
-            i = 0
-            for match in pat.finditer(chunk):
-                start, end = match.start(), match.end()
-                assert i <= start <= end <= len(chunk)
-                sub = sub_fn(match)
-                if i != start:
-                    result.segments.append((chunk[i:start], f))
-                result.segments.extend(sub.segments)
-                i = end
-            if chunk[i:]:
-                result.segments.append((chunk[i:], f))
-        return result
-
-    def ljust(self, column_width: int) -> "Text":
-        length = sum(len(x) for x, _ in self.segments)
-        return self + " " * max(column_width - length, 0)
-
-
-@dataclass
-class TableLine:
-    key: Optional[str]
-    is_data_ref: bool
-    cells: Tuple[Tuple[Text, Optional["Line"]], ...]
-
-
-@dataclass
-class TableData:
-    headers: Tuple[Text, ...]
-    current_score: int
-    max_score: int
-    previous_score: Optional[int]
-    lines: List[TableLine]
-
-
-class Formatter(abc.ABC):
-    @abc.abstractmethod
-    def apply_format(self, chunk: str, f: Format) -> str:
-        """Apply the formatting `f` to `chunk` and escape the contents."""
-        ...
-
-    @abc.abstractmethod
-    def table(self, data: TableData) -> str:
-        """Format a multi-column table with metadata"""
-        ...
-
-    def apply(self, text: Text) -> str:
-        return "".join(self.apply_format(chunk, f) for chunk, f in text.segments)
-
-    @staticmethod
-    def outputline_texts(line: TableLine) -> Tuple[Text, ...]:
-        return tuple(cell[0] for cell in line.cells)
-
-
-@dataclass
-class PlainFormatter(Formatter):
-    column_width: int
-
-    def apply_format(self, chunk: str, f: Format) -> str:
-        return chunk
-
-    def table(self, data: TableData) -> str:
-        rows = [data.headers] + [self.outputline_texts(line) for line in data.lines]
-        return "\n".join(
-            "".join(self.apply(x.ljust(self.column_width)) for x in row) for row in rows
-        )
-
-
-@dataclass
-class AnsiFormatter(Formatter):
-    # Additional ansi escape codes not in colorama. See:
-    # https://en.wikipedia.org/wiki/ANSI_escape_code#SGR_(Select_Graphic_Rendition)_parameters
-    STYLE_UNDERLINE = "\x1b[4m"
-    STYLE_NO_UNDERLINE = "\x1b[24m"
-    STYLE_INVERT = "\x1b[7m"
-    STYLE_RESET = "\x1b[0m"
-
-    BASIC_ANSI_CODES = {
-        BasicFormat.NONE: "",
-        BasicFormat.IMMEDIATE: Fore.LIGHTBLUE_EX,
-        BasicFormat.STACK: Fore.YELLOW,
-        BasicFormat.REGISTER: Fore.YELLOW,
-        BasicFormat.REGISTER_CATEGORY: Fore.LIGHTYELLOW_EX,
-        BasicFormat.DIFF_CHANGE: Fore.LIGHTBLUE_EX,
-        BasicFormat.DIFF_ADD: Fore.GREEN,
-        BasicFormat.DIFF_REMOVE: Fore.RED,
-        BasicFormat.SOURCE_FILENAME: Style.DIM + Style.BRIGHT,
-        BasicFormat.SOURCE_FUNCTION: Style.DIM + Style.BRIGHT + STYLE_UNDERLINE,
-        BasicFormat.SOURCE_LINE_NUM: Fore.LIGHTBLACK_EX,
-        BasicFormat.SOURCE_OTHER: Style.DIM,
-    }
-
-    BASIC_ANSI_CODES_UNDO = {
-        BasicFormat.NONE: "",
-        BasicFormat.SOURCE_FILENAME: Style.NORMAL,
-        BasicFormat.SOURCE_FUNCTION: Style.NORMAL + STYLE_NO_UNDERLINE,
-        BasicFormat.SOURCE_OTHER: Style.NORMAL,
-    }
-
-    ROTATION_ANSI_COLORS = [
-        Fore.MAGENTA,
-        Fore.CYAN,
-        Fore.GREEN,
-        Fore.RED,
-        Fore.LIGHTYELLOW_EX,
-        Fore.LIGHTMAGENTA_EX,
-        Fore.LIGHTCYAN_EX,
-        Fore.LIGHTGREEN_EX,
-        Fore.LIGHTBLACK_EX,
-    ]
-
-    column_width: int
-
-    def apply_format(self, chunk: str, f: Format) -> str:
-        if f == BasicFormat.NONE:
-            return chunk
-        undo_ansi_code = Fore.RESET
-        if isinstance(f, BasicFormat):
-            ansi_code = self.BASIC_ANSI_CODES[f]
-            undo_ansi_code = self.BASIC_ANSI_CODES_UNDO.get(f, undo_ansi_code)
-        elif isinstance(f, RotationFormat):
-            ansi_code = self.ROTATION_ANSI_COLORS[
-                f.index % len(self.ROTATION_ANSI_COLORS)
-            ]
-        else:
-            static_assert_unreachable(f)
-        return f"{ansi_code}{chunk}{undo_ansi_code}"
-
-    def table(self, data: TableData) -> str:
-        rows = [(data.headers, False)] + [
-            (
-                self.outputline_texts(line),
-                line.is_data_ref,
-            )
-            for line in data.lines
-        ]
-        return "\n".join(
-            "".join(
-                (self.STYLE_INVERT if is_data_ref else "")
-                + self.apply(x.ljust(self.column_width))
-                + (self.STYLE_RESET if is_data_ref else "")
-                for x in row
-            )
-            for (row, is_data_ref) in rows
-        )
-
-
-@dataclass
-class HtmlFormatter(Formatter):
-    rotation_formats: int = 9
-
-    def apply_format(self, chunk: str, f: Format) -> str:
-        chunk = html.escape(chunk)
-        if f == BasicFormat.NONE:
-            return chunk
-        if isinstance(f, BasicFormat):
-            class_name = f.name.lower().replace("_", "-")
-            data_attr = ""
-        elif isinstance(f, RotationFormat):
-            class_name = f"rotation-{f.index % self.rotation_formats}"
-            rotation_key = html.escape(f"{f.group};{f.key}", quote=True)
-            data_attr = f'data-rotation="{rotation_key}"'
-        else:
-            static_assert_unreachable(f)
-        return f"<span class='{class_name}' {data_attr}>{chunk}</span>"
-
-    def table(self, data: TableData) -> str:
-        def table_row(line: Tuple[Text, ...], is_data_ref: bool, cell_el: str) -> str:
-            tr_attrs = " class='data-ref'" if is_data_ref else ""
-            output_row = f"    <tr{tr_attrs}>"
-            for cell in line:
-                cell_html = self.apply(cell)
-                output_row += f"<{cell_el}>{cell_html}</{cell_el}>"
-            output_row += "</tr>\n"
-            return output_row
-
-        output = "<table class='diff'>\n"
-        output += "  <thead>\n"
-        output += table_row(data.headers, False, "th")
-        output += "  </thead>\n"
-        output += "  <tbody>\n"
-        output += "".join(
-            table_row(self.outputline_texts(line), line.is_data_ref, "td")
-            for line in data.lines
-        )
-        output += "  </tbody>\n"
-        output += "</table>\n"
-        return output
-
-
-@dataclass
-class PythonFormatter(Formatter):
-    arch_str: str
-
-    def apply_format(self, chunk: str, f: Format) -> str:
-        # This method is unused by this formatter
-        return NotImplemented
-
-    def table(self, data: TableData) -> str:
-        # This method is unused by this formatter
-        return NotImplemented
-
-    def raw(self, data: TableData) -> Dict[str, Any]:
-        def serialize_format(s: str, f: Format) -> Dict[str, Any]:
-            if f == BasicFormat.NONE:
-                return {"text": s}
-            elif isinstance(f, BasicFormat):
-                return {"text": s, "format": f.name.lower()}
-            elif isinstance(f, RotationFormat):
-                attrs = asdict(f)
-                attrs.update({"text": s, "format": "rotation"})
-                return attrs
-            else:
-                static_assert_unreachable(f)
-
-        def serialize(text: Optional[Text]) -> List[Dict[str, Any]]:
-            if text is None:
-                return []
-            return [serialize_format(s, f) for s, f in text.segments]
-
-        output: Dict[str, Any] = {}
-        output["arch_str"] = self.arch_str
-        output["header"] = {
-            name: serialize(h)
-            for h, name in zip(data.headers, ("base", "current", "previous"))
-        }
-        output["current_score"] = data.current_score
-        output["max_score"] = data.max_score
-        if data.previous_score is not None:
-            output["previous_score"] = data.previous_score
-        output_rows: List[Dict[str, Any]] = []
-        for row in data.lines:
-            output_row: Dict[str, Any] = {}
-            output_row["key"] = row.key
-            output_row["is_data_ref"] = row.is_data_ref
-            iters: List[Tuple[str, Text, Optional[Line]]] = [
-                (label, *cell)
-                for label, cell in zip(("base", "current", "previous"), row.cells)
-            ]
-            if all(line is None for _, _, line in iters):
-                # Skip rows that were only for displaying source code
-                continue
-            for column_name, text, line in iters:
-                column: Dict[str, Any] = {}
-                column["text"] = serialize(text)
-                if line:
-                    if line.mnemonic is not None:
-                        column["mnemonic"] = line.mnemonic
-                    if line.symbol is not None:
-                        column["symbol"] = line.symbol
-                    if line.line_num is not None:
-                        column["line"] = line.line_num
-                    if line.branch_target is not None:
-                        column["branch"] = line.branch_target
-                    if line.source_lines:
-                        column["src"] = line.source_lines
-                    if line.comment is not None:
-                        column["src_comment"] = line.comment
-                    if line.source_line_num is not None:
-                        column["src_line"] = line.source_line_num
-                if line or column["text"]:
-                    output_row[column_name] = column
-            output_rows.append(output_row)
-        output["rows"] = output_rows
-        return output
-
-
-@dataclass
-class JsonFormatter(PythonFormatter):
-    def table(self, data: TableData) -> str:
-        output = super().raw(data)
-        return json.dumps(output)
-
-
-def format_fields(
-    pat: Pattern[str],
-    out1: Text,
-    out2: Text,
-    color1: FormatFunction,
-    color2: Optional[FormatFunction] = None,
-) -> Tuple[Text, Text]:
-    diffs = [
-        of.group() != nf.group()
-        for (of, nf) in zip(out1.finditer(pat), out2.finditer(pat))
-    ]
-
-    it = iter(diffs)
-
-    def maybe_color(color: FormatFunction, s: str) -> Text:
-        return Text(s, color(s)) if next(it, False) else Text(s)
-
-    out1 = out1.sub(pat, lambda m: maybe_color(color1, m.group()))
-    it = iter(diffs)
-    out2 = out2.sub(pat, lambda m: maybe_color(color2 or color1, m.group()))
-
-    return out1, out2
-
-
-def symbol_formatter(group: str, base_index: int) -> FormatFunction:
-    symbol_formats: Dict[str, Format] = {}
-
-    def symbol_format(s: str) -> Format:
-        # TODO: it would be nice to use a unique Format for each symbol, so we could
-        # add extra UI elements in the HTML version
-        f = symbol_formats.get(s)
-        if f is None:
-            index = len(symbol_formats) + base_index
-            f = RotationFormat(key=s, index=index, group=group)
-            symbol_formats[s] = f
-        return f
-
-    return symbol_format
-
-
-# ==== LOGIC ====
-
-ObjdumpCommand = Tuple[List[str], str, Optional[str]]
-
-# eval_expr adapted from https://stackoverflow.com/a/9558001
-
-import ast
-import operator as op
-
-operators: Dict[Type[Union[ast.operator, ast.unaryop]], Any] = {
-    ast.Add: op.add,
-    ast.Sub: op.sub,
-    ast.Mult: op.mul,
-    ast.Div: op.floordiv,
-    ast.USub: op.neg,
-    ast.Pow: op.pow,
-    ast.BitXor: op.xor,
-    ast.BitOr: op.or_,
-    ast.BitAnd: op.and_,
-    ast.Invert: op.inv,
-}
-
-
-def eval_expr(expr: str) -> Any:
-    return eval_(ast.parse(expr, mode="eval").body)
-
-
-def eval_(node: ast.AST) -> Any:
-    if (
-        hasattr(ast, "Constant")
-        and isinstance(node, ast.Constant)
-        and isinstance(node.value, int)
-    ):  # Python 3.8+
-        return node.value
-    elif isinstance(node, ast.BinOp):
-        return operators[type(node.op)](eval_(node.left), eval_(node.right))
-    elif isinstance(node, ast.UnaryOp):
-        return operators[type(node.op)](eval_(node.operand))
-    elif sys.version_info < (3, 8) and isinstance(node, ast.Num):
-        return node.n
-    else:
-        raise TypeError(node)
-
-
-def maybe_eval_int(expr: str) -> Optional[int]:
-    try:
-        ret = eval_expr(expr)
-        if not isinstance(ret, int):
-            raise Exception("not an integer")
-        return ret
-    except Exception:
-        return None
-
-
-def eval_int(expr: str, emsg: str) -> int:
-    ret = maybe_eval_int(expr)
-    if ret is None:
-        fail(emsg)
-    return ret
-
-
-def eval_line_num(expr: str) -> Optional[int]:
-    expr = expr.strip().replace(":", "")
-    if expr == "":
-        return None
-    return int(expr, 16)
-
-
-def run_make(target: str, project: ProjectSettings) -> None:
-    subprocess.check_call(project.build_command + [target])
-
-
-def run_make_capture_output(
-    target: str, project: ProjectSettings
-) -> "subprocess.CompletedProcess[bytes]":
-    return subprocess.run(
-        project.build_command + [target],
-        stderr=subprocess.PIPE,
-        stdout=subprocess.PIPE,
-    )
-
-
-def restrict_to_function(dump: str, fn_name: str) -> str:
-    try:
-        # Find the start of the line that contains "<fn_name>:"
-        ind = dump.rfind("\n", 0, dump.index(f"<{fn_name}>:")) + 1
-        return dump[ind:]
-    except ValueError:
-        return ""
-
-
-def serialize_rodata_references(references: List[Tuple[int, int, str]]) -> str:
-    return "".join(
-        f"DATAREF {text_offset} {from_offset} {from_section}\n"
-        for (text_offset, from_offset, from_section) in references
-    )
-
-
-def maybe_get_objdump_source_flags(config: Config) -> List[str]:
-    flags = []
-
-    if config.show_line_numbers or config.show_source:
-        flags.append("--line-numbers")
-
-    if config.show_source:
-        flags.append("--source")
-
-        if not config.source_old_binutils:
-            flags.append("--source-comment=│ ")
-
-        if config.inlines:
-            flags.append("--inlines")
-
-    return flags
-
-
-def run_objdump(cmd: ObjdumpCommand, config: Config, project: ProjectSettings) -> str:
-    flags, target, restrict = cmd
-    try:
-        out = subprocess.run(
-            [project.objdump_executable]
-            + config.arch.arch_flags
-            + project.objdump_flags
-            + flags
-            + [target],
-            check=True,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            universal_newlines=True,
-        ).stdout
-    except subprocess.CalledProcessError as e:
-        print(e.stdout)
-        print(e.stderr)
-        if "unrecognized option '--source-comment" in e.stderr:
-            fail("** Try using --source-old-binutils instead of --source **")
-        raise e
-
-    obj_data: Optional[bytes] = None
-    if config.diff_obj:
-        with open(target, "rb") as f:
-            obj_data = f.read()
-
-    return preprocess_objdump_out(restrict, obj_data, out, config)
-
-
-def preprocess_objdump_out(
-    restrict: Optional[str], obj_data: Optional[bytes], objdump_out: str, config: Config
-) -> str:
-    """
-    Preprocess the output of objdump into a format that `process()` expects.
-    This format is suitable for saving to disk with `--write-asm`.
-
-    - Optionally filter the output to a single function (`restrict`)
-    - Otherwise, strip objdump header (6 lines)
-    - Prepend .data references ("DATAREF" lines) when working with object files
-    """
-    out = objdump_out
-
-    if restrict is not None:
-        out = restrict_to_function(out, restrict)
-    else:
-        for i in range(6):
-            out = out[out.find("\n") + 1 :]
-        out = out.rstrip("\n")
-
-    if obj_data and config.show_rodata_refs:
-        out = (
-            serialize_rodata_references(parse_elf_rodata_references(obj_data, config))
-            + out
-        )
-
-    processor = config.arch.proc(config)
-    return processor.preprocess_objdump(out)
-
-
-def search_build_objects(objname: str, project: ProjectSettings) -> Optional[str]:
-    objfiles = [
-        os.path.join(dirpath, f)
-        for dirpath, _, filenames in os.walk(project.build_dir)
-        for f in filenames
-        if f == objname
-    ]
-    if len(objfiles) > 1:
-        all_objects = "\n".join(objfiles)
-        fail(
-            f"Found multiple objects of the same name {objname} in {project.build_dir}, "
-            f"cannot determine which to diff against: \n{all_objects}"
-        )
-    if len(objfiles) == 1:
-        return objfiles[0]
-
-    return None
-
-
-def search_map_file(
-    fn_name: str, project: ProjectSettings, config: Config, *, for_binary: bool
-) -> Tuple[Optional[str], Optional[int]]:
-    if not project.mapfile:
-        fail(f"No map file configured; cannot find function {fn_name}.")
-
-    try:
-        with open(
-            project.mapfile,
-            encoding=MAPFILE_ENCODING,
-            errors=MAPFILE_ENCODING_ERROR_HANDLER,
-        ) as f:
-            contents = f.read()
-    except Exception:
-        fail(f"Failed to open map file {project.mapfile} for reading.")
-
-    if project.map_format == "gnu":
-        if for_binary and "load address" not in contents:
-            fail(
-                'Failed to find "load address" in map file. Maybe you need to add\n'
-                '"export LANG := C" to your Makefile to avoid localized output?'
-            )
-
-        lines = contents.split("\n")
-
-        try:
-            cur_objfile = None
-            ram_to_rom = None
-            cands = []
-            last_line = ""
-            for line in lines:
-                if line.startswith(" " + config.diff_section):
-                    cur_objfile = line.split()[3]
-                if "load address" in line:
-                    tokens = last_line.split() + line.split()
-                    ram = int(tokens[1], 0)
-                    rom = int(tokens[5], 0)
-                    ram_to_rom = rom - ram
-                if line.endswith(" " + fn_name) or f" {fn_name} = 0x" in line:
-                    ram = int(line.split()[0], 0)
-                    if (for_binary and ram_to_rom is not None) or (
-                        not for_binary and cur_objfile is not None
-                    ):
-                        cands.append((cur_objfile, ram + (ram_to_rom or 0)))
-                last_line = line
-        except Exception as e:
-            traceback.print_exc()
-            fail(f"Internal error while parsing map file")
-
-        if len(cands) > 1:
-            fail(f"Found multiple occurrences of function {fn_name} in map file.")
-        if len(cands) == 1:
-            return cands[0]
-    elif project.map_format == "mw":
-        find = re.findall(
-            # start address, size, virtual address, file offset, alignment
-            r"  [0-9a-f]{8} [0-9a-f]{6} ([0-9a-f]{8})(?: ([0-9a-f]{8}))?(?: +\S{1,2})? +"
-            + re.escape(fn_name)
-            + r"(?: \(entry of "
-            + re.escape(config.diff_section)
-            + r"\))? \t"
-            # object name
-            + r"(\S+)",
-            contents,
-        )
-        if len(find) > 1:
-            fail(f"Found multiple occurrences of function {fn_name} in map file.")
-        if len(find) == 1:
-            if find[0][1]:
-                rom = int(find[0][1], 16)
-            else:
-                rom = None
-            objname = find[0][2]
-            objfile = search_build_objects(objname, project)
-
-            # TODO Currently the ram-rom conversion only works for diffing ELF
-            # executables, but it would likely be more convenient to diff DOLs.
-            # At this time it is recommended to always use -o when running the diff
-            # script as this mode does not make use of the ram-rom conversion.
-            if objfile is not None:
-                return objfile, rom
-    elif project.map_format == "ms":
-        load_address_find = re.search(
-            r"Preferred load address is ([0-9a-f]+)",
-            contents,
-        )
-        if not load_address_find:
-            fail(f"Couldn't find module load address in map file.")
-        load_address = int(load_address_find.group(1), 16)
-
-        diff_segment_find = re.search(
-            r"([0-9a-f]+):[0-9a-f]+ [0-9a-f]+H " + re.escape(config.diff_section),
-            contents,
-        )
-        if not diff_segment_find:
-            fail(f"Couldn't find segment for section in map file.")
-        diff_segment = diff_segment_find.group(1)
-
-        find = re.findall(
-            r" (?:"
-            + re.escape(diff_segment)
-            + r")\S+\s+(?:"
-            + re.escape(fn_name)
-            + r")\s+\S+ ... \S+",
-            contents,
-        )
-        if len(find) > 1:
-            fail(f"Found multiple occurrences of function {fn_name} in map file.")
-        if len(find) == 1:
-            names_find = re.search(r"(\S+) ... (\S+)", find[0])
-            assert names_find is not None
-            fileofs = int(names_find.group(1), 16) - load_address
-            if for_binary:
-                return None, fileofs
-
-            objname = names_find.group(2)
-            objfile = search_build_objects(objname, project)
-            if objfile is not None:
-                return objfile, fileofs
-    else:
-        fail(f"Linker map format {project.map_format} unrecognised.")
-    return None, None
-
-
-def parse_elf_rodata_references(
-    data: bytes, config: Config
-) -> List[Tuple[int, int, str]]:
-    e_ident = data[:16]
-    if e_ident[:4] != b"\x7FELF":
-        return []
-
-    SHT_SYMTAB = 2
-    SHT_REL = 9
-    SHT_RELA = 4
-    R_MIPS_32 = 2
-    R_MIPS_GPREL32 = 12
-
-    is_32bit = e_ident[4] == 1
-    is_little_endian = e_ident[5] == 1
-    str_end = "<" if is_little_endian else ">"
-    str_off = "I" if is_32bit else "Q"
-
-    def read(spec: str, offset: int) -> Tuple[int, ...]:
-        spec = spec.replace("P", str_off)
-        size = struct.calcsize(spec)
-        return struct.unpack(str_end + spec, data[offset : offset + size])
-
-    (
-        e_type,
-        e_machine,
-        e_version,
-        e_entry,
-        e_phoff,
-        e_shoff,
-        e_flags,
-        e_ehsize,
-        e_phentsize,
-        e_phnum,
-        e_shentsize,
-        e_shnum,
-        e_shstrndx,
-    ) = read("HHIPPPIHHHHHH", 16)
-    if e_type != 1:  # relocatable
-        return []
-    assert e_shoff != 0
-    assert e_shnum != 0  # don't support > 0xFF00 sections
-    assert e_shstrndx != 0
-
-    @dataclass
-    class Section:
-        sh_name: int
-        sh_type: int
-        sh_flags: int
-        sh_addr: int
-        sh_offset: int
-        sh_size: int
-        sh_link: int
-        sh_info: int
-        sh_addralign: int
-        sh_entsize: int
-
-    sections = [
-        Section(*read("IIPPPPIIPP", e_shoff + i * e_shentsize)) for i in range(e_shnum)
-    ]
-    shstr = sections[e_shstrndx]
-    sec_name_offs = [shstr.sh_offset + s.sh_name for s in sections]
-    sec_names = [data[offset : data.index(b"\0", offset)] for offset in sec_name_offs]
-
-    symtab_sections = [i for i in range(e_shnum) if sections[i].sh_type == SHT_SYMTAB]
-    assert len(symtab_sections) == 1
-    symtab = sections[symtab_sections[0]]
-
-    section_name = config.diff_section.encode("utf-8")
-    text_sections = [
-        i
-        for i in range(e_shnum)
-        if sec_names[i] == section_name and sections[i].sh_size != 0
-    ]
-    if len(text_sections) != 1:
-        return []
-    text_section = text_sections[0]
-
-    ret: List[Tuple[int, int, str]] = []
-    for s in sections:
-        if s.sh_type == SHT_REL or s.sh_type == SHT_RELA:
-            if s.sh_info == text_section:
-                # Skip section_name -> section_name references
-                continue
-            sec_name = sec_names[s.sh_info].decode("latin1")
-            if sec_name not in (".rodata", ".late_rodata"):
-                continue
-            sec_base = sections[s.sh_info].sh_offset
-            for i in range(0, s.sh_size, s.sh_entsize):
-                if s.sh_type == SHT_REL:
-                    r_offset, r_info = read("PP", s.sh_offset + i)
-                else:
-                    r_offset, r_info, r_addend = read("PPP", s.sh_offset + i)
-
-                if is_32bit:
-                    r_sym = r_info >> 8
-                    r_type = r_info & 0xFF
-                    sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym
-                    st_name, st_value, st_size, st_info, st_other, st_shndx = read(
-                        "IIIBBH", sym_offset
-                    )
-                else:
-                    r_sym = r_info >> 32
-                    r_type = r_info & 0xFFFFFFFF
-                    sym_offset = symtab.sh_offset + symtab.sh_entsize * r_sym
-                    st_name, st_info, st_other, st_shndx, st_value, st_size = read(
-                        "IBBHQQ", sym_offset
-                    )
-                if st_shndx == text_section:
-                    if s.sh_type == SHT_REL:
-                        if e_machine == 8 and r_type in (R_MIPS_32, R_MIPS_GPREL32):
-                            (r_addend,) = read("I", sec_base + r_offset)
-                        else:
-                            continue
-                    text_offset = (st_value + r_addend) & 0xFFFFFFFF
-                    ret.append((text_offset, r_offset, sec_name))
-    return ret
-
-
-def dump_elf(
-    start: str,
-    end: Optional[str],
-    diff_elf_symbol: str,
-    config: Config,
-    project: ProjectSettings,
-) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
-    if not project.baseimg or not project.myimg:
-        fail("Missing myimg/baseimg in config.")
-    if config.base_shift:
-        fail("--base-shift not compatible with -e")
-
-    start_addr = eval_int(start, "Start address must be an integer expression.")
-
-    if end is not None:
-        end_addr = eval_int(end, "End address must be an integer expression.")
-    else:
-        end_addr = start_addr + config.max_function_size_bytes
-
-    flags1 = [
-        f"--start-address={start_addr}",
-        f"--stop-address={end_addr}",
-    ]
-
-    if project.disassemble_all:
-        disassemble_flag = "-D"
-    else:
-        disassemble_flag = "-d"
-
-    if "llvm-" in project.objdump_executable:
-        flags2 = ["--disassemble", f"--disassemble-symbols={diff_elf_symbol}"]
-    else:
-        flags2 = [
-            f"--disassemble={diff_elf_symbol}",
-        ]
-
-    objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
-    return (
-        project.myimg,
-        (objdump_flags + flags1, project.baseimg, None),
-        (
-            objdump_flags + flags2 + maybe_get_objdump_source_flags(config),
-            project.myimg,
-            None,
-        ),
-    )
-
-
-def dump_objfile(
-    start: str, end: Optional[str], config: Config, project: ProjectSettings
-) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
-    if config.base_shift:
-        fail("--base-shift not compatible with -o")
-    if end is not None:
-        fail("end address not supported together with -o")
-    if start.startswith("0"):
-        fail("numerical start address not supported with -o; pass a function name")
-
-    objfile = config.file
-    if not objfile:
-        objfile, _ = search_map_file(start, project, config, for_binary=False)
-
-    if not objfile:
-        fail("Not able to find .o file for function.")
-
-    if config.make:
-        run_make(objfile, project)
-
-    if not os.path.isfile(objfile):
-        fail(f"Not able to find .o file for function: {objfile} is not a file.")
-
-    refobjfile = os.path.join(project.expected_dir, objfile)
-    if config.diff_mode != DiffMode.SINGLE and not os.path.isfile(refobjfile):
-        fail(f'Please ensure an OK .o file exists at "{refobjfile}".')
-
-    if project.disassemble_all:
-        disassemble_flag = "-D"
-    else:
-        disassemble_flag = "-d"
-
-    objdump_flags = [disassemble_flag, "-rz", "-j", config.diff_section]
-    return (
-        objfile,
-        (objdump_flags, refobjfile, start),
-        (objdump_flags + maybe_get_objdump_source_flags(config), objfile, start),
-    )
-
-
-def dump_binary(
-    start: str, end: Optional[str], config: Config, project: ProjectSettings
-) -> Tuple[str, ObjdumpCommand, ObjdumpCommand]:
-    binfile = config.file or project.myimg
-    if not project.baseimg or not binfile:
-        fail("Missing myimg/baseimg in config.")
-    if config.make:
-        run_make(binfile, project)
-    if not os.path.isfile(binfile):
-        fail(f"Not able to find binary file: {binfile}")
-    start_addr = maybe_eval_int(start)
-    if start_addr is None and config.file is None:
-        _, start_addr = search_map_file(start, project, config, for_binary=True)
-        if start_addr is None:
-            fail("Not able to find function in map file.")
-        start_addr += project.map_address_offset
-    elif start_addr is None:
-        fail("Start address must be an integer expression when using binary -f")
-    if end is not None:
-        end_addr = eval_int(end, "End address must be an integer expression.")
-    else:
-        end_addr = start_addr + config.max_function_size_bytes
-    objdump_flags = ["-Dz", "-bbinary"] + ["-EB" if config.arch.big_endian else "-EL"]
-    flags1 = [
-        f"--start-address={start_addr + config.base_shift}",
-        f"--stop-address={end_addr + config.base_shift}",
-    ]
-    flags2 = [f"--start-address={start_addr}", f"--stop-address={end_addr}"]
-    return (
-        binfile,
-        (objdump_flags + flags1, project.baseimg, None),
-        (objdump_flags + flags2, binfile, None),
-    )
-
-
-# The base class is a no-op.
-class AsmProcessor:
-    def __init__(self, config: Config) -> None:
-        self.config = config
-
-    # Called during run_objdump() for arch-specific normalization. Runs before
-    # diff-processing, i.e. process().
-    def preprocess_objdump(self, objdump: str) -> str:
-        return objdump
-
-    def pre_process(
-        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
-    ) -> Tuple[str, str]:
-        return mnemonic, args
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        return prev, None
-
-    def normalize(self, mnemonic: str, row: str) -> str:
-        """This should be called exactly once for each line."""
-        arch = self.config.arch
-        row = self._normalize_arch_specific(mnemonic, row)
-        if self.config.ignore_large_imms and mnemonic not in arch.branch_instructions:
-            row = re.sub(self.config.arch.re_large_imm, "<imm>", row)
-        return row
-
-    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
-        return row
-
-    def post_process(self, lines: List["Line"]) -> None:
-        return
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        return False
-
-
-class AsmProcessorMIPS(AsmProcessor):
-    def __init__(self, config: Config) -> None:
-        super().__init__(config)
-        self.seen_jr_ra = False
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        arch = self.config.arch
-        if "R_MIPS_NONE" in row or "R_MIPS_JALR" in row:
-            # GNU as emits no-op relocations immediately after real ones when
-            # assembling with -mabi=64. Return without trying to parse 'imm' as an
-            # integer.
-            return prev, None
-        before, imm, after = parse_relocated_line(prev)
-        addend = reloc_addend_from_imm(imm, before, self.config.arch)
-        repl = row.split()[-1] + addend
-        if "R_MIPS_LO16" in row:
-            repl = f"%lo({repl})"
-        elif "R_MIPS_HI16" in row:
-            # Ideally we'd pair up R_MIPS_LO16 and R_MIPS_HI16 to generate a
-            # correct addend for each, but objdump doesn't give us the order of
-            # the relocations, so we can't find the right LO16. :(
-            repl = f"%hi({repl})"
-        elif "R_MIPS_26" in row:
-            # Function calls
-            pass
-        elif "R_MIPS_PC16" in row:
-            # Branch to glabel. This gives confusing output, but there's not much
-            # we can do here.
-            pass
-        elif "R_MIPS_GPREL16" in row:
-            repl = f"%gp_rel({repl})"
-        elif "R_MIPS_GOT16" in row:
-            repl = f"%got({repl})"
-        elif "R_MIPS_CALL16" in row:
-            repl = f"%call16({repl})"
-        elif "R_MIPS_LITERAL" in row:
-            # MWCC emits R_MIPS_LITERAL for float literals which don't have the same structure as the
-            # relocations emitted by GCC
-            # Only remove the addend if it is found in the relocation
-            if addend != "":
-                # .lit4+0x4000-0x4000 -> .lit4+0x4000
-                repl = repl[: -len(addend)]
-        else:
-            assert False, f"unknown relocation type '{row}' for line '{prev}'"
-        return before + repl + after, repl
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        if self.seen_jr_ra:
-            return True
-        if mnemonic == "jr" and args == "ra":
-            self.seen_jr_ra = True
-        return False
-
-
-class AsmProcessorPPC(AsmProcessor):
-    def pre_process(
-        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
-    ) -> Tuple[str, str]:
-        if next_row and "R_PPC_EMB_SDA21" in next_row:
-            # With sda21 relocs, the linker transforms `r0` into `r2`/`r13`, and
-            # we may encounter this in either pre-transformed or post-transformed
-            # versions depending on if the .o file comes from compiler output or
-            # from disassembly. Normalize, to make sure both forms are treated as
-            # equivalent.
-
-            args = args.replace("(r2)", "(0)")
-            args = args.replace("(r13)", "(0)")
-            args = args.replace(",r2,", ",0,")
-            args = args.replace(",r13,", ",0,")
-
-            # We want to convert li and lis with an sda21 reloc,
-            # because the r0 to r2/r13 transformation results in
-            # turning an li/lis into an addi/addis with r2/r13 arg
-            # our preprocessing normalizes all versions to addi with a 0 arg
-            if mnemonic in {"li", "lis"}:
-                mnemonic = mnemonic.replace("li", "addi")
-                args_parts = args.split(",")
-                args = args_parts[0] + ",0," + args_parts[1]
-        if (
-            next_row
-            and ("R_PPC_REL24" in next_row or "R_PPC_REL14" in next_row)
-            and ".text+0x" in next_row
-            and mnemonic in PPC_BRANCH_INSTRUCTIONS
-        ):
-            # GCC emits a relocation of "R_PPC_REL14" or "R_PPC_REL24" with a .text offset
-            # fixup the args to use the offset from the relocation
-
-            # Split args by ',' which will result in either [cr, offset] or [offset]
-            # Replace the current offset with the next line's ".text+0x" offset
-            splitArgs = args.split(",")
-            splitArgs[-1] = next_row.split(".text+0x")[-1]
-            args = ",".join(splitArgs)
-
-        if (
-            comment is not None
-            and (
-                next_row is None
-                or re.search(self.config.arch.re_reloc, next_row) is None
-            )
-            and mnemonic == "bl"
-        ):
-            # if the mnemonic is bl and the comment doesn't match
-            # <.text+0x...> replace the args with the contents of the comment
-            if re.search(r"<.+\+0x[0-9a-fA-F]+>", comment) is None:
-                args = comment[1:-1]
-
-        return mnemonic, args
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        # row is the line with the relocations
-        # prev is the line to apply relocations to
-
-        arch = self.config.arch
-        assert any(
-            r in row
-            for r in ["R_PPC_REL24", "R_PPC_ADDR16", "R_PPC_EMB_SDA21", "R_PPC_REL14"]
-        ), f"unknown relocation type '{row}' for line '{prev}'"
-        before, imm, after = parse_relocated_line(prev)
-        repl = row.split()[-1]
-        mnemonic, args = prev.split(maxsplit=1)
-
-        if "R_PPC_REL24" in row:
-            # function calls
-            # or unconditional branches generated by GCC "b offset"
-            if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row:
-                # this has been handled in pre_process
-                return prev, None
-        elif "R_PPC_REL14" in row:
-            if mnemonic in PPC_BRANCH_INSTRUCTIONS and ".text+0x" in row:
-                # this has been handled in pre_process
-                return prev, None
-        elif "R_PPC_ADDR16_HI" in row:
-            # absolute hi of addr
-            repl = f"{repl}@h"
-        elif "R_PPC_ADDR16_HA" in row:
-            # adjusted hi of addr
-            repl = f"{repl}@ha"
-        elif "R_PPC_ADDR16_LO" in row:
-            # lo of addr
-            repl = f"{repl}@l"
-        elif "R_PPC_ADDR16" in row:
-            # 16-bit absolute addr
-            if "+0x7" in repl:
-                # remove the very large addends as they are an artifact of (label-_SDA(2)_BASE_)
-                # computations and are unimportant in a diff setting.
-                if int(repl.split("+")[1], 16) > 0x70000000:
-                    repl = repl.split("+")[0]
-        elif "R_PPC_EMB_SDA21" in row:
-            # sda21 relocations; r2/r13 --> 0 swaps are performed in pre_process
-            repl = f"{repl}@sda21"
-
-        return before + repl + after, repl
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        return mnemonic == "blr"
-
-
-# Example: "cmp r0, #0x10"
-ARM32_COMPARE_IMM_PATTERN = r"cmp\s+(r[0-9]|1[0-3]),\s+#(\w+)"
-
-# Example: "add pc, r1"
-ARM32_JUMP_TABLE_START = r"add\s+pc,\s*r"
-
-# Examples:
-#  - "44:   00060032    .word   0x00060032"
-#  - "48:   0032        .short  0x0032"
-#  - "4a:   0032        movs    r2, r6"
-#  - "9e:   00be        lsls    r6, r7, #2"
-#  - ".short  0x0032   ; 0x64"
-ARM32_JUMP_TABLE_ENTRY_PATTERN = r"(?:(\w+):\s+([0-9a-f]+)\s+)?([\w\.]+)\s+([\w,\ ]+)"
-
-# Example: "ldr r4, [pc, #56]    ; (4c <AddCoins+0x4c>)"
-ARM32_LOAD_POOL_PATTERN = r"(ldr\s+r([0-9]|1[0-3]),\s+\[pc,.*;\s*)(\([a-fA-F0-9]+.*\))"
-
-
-class AsmProcessorARM32(AsmProcessor):
-    @dataclass
-    class JumpTableEntry:
-        cur_addr: int
-        table_start_addr: int
-        value: int
-        is_word: bool
-
-    def preprocess_objdump(self, objdump: str) -> str:
-        def short_table_entry(
-            cur_addr: int, jump_table_start_addr: int, value: int
-        ) -> str:
-            branch_target = jump_table_start_addr + value + 4
-            return f"  {cur_addr:x}:	{value:04x}      	.short	0x{value:04x}  ; 0x{branch_target:x}"
-
-        new_lines = []
-        lines = objdump.splitlines()
-        for i, jump_table_entry in self._lines_iterator(lines):
-            if jump_table_entry is None:
-                new_lines.append(lines[i])
-                continue
-
-            entry = jump_table_entry
-            if entry.is_word:
-                # Split into two ".short" entries.
-                hi, lo = entry.value >> 16, entry.value & 0xFFFF
-                new_lines.append(
-                    short_table_entry(entry.cur_addr, entry.table_start_addr, lo)
-                )
-                new_lines.append(
-                    short_table_entry(entry.cur_addr + 2, entry.table_start_addr, hi)
-                )
-            else:
-                new_lines.append(
-                    short_table_entry(
-                        entry.cur_addr, entry.table_start_addr, entry.value
-                    )
-                )
-        return "\n".join(new_lines)
-
-    # An iterator for each line of assembly, returning the line index and optional
-    # metadata if the line is a jump table entry.
-    def _lines_iterator(
-        self, lines: List[str]
-    ) -> Iterator[Tuple[int, Optional[JumpTableEntry]]]:
-        jump_table_entries = 0
-        table_start_addr = 0
-        for i, line in enumerate(lines):
-            addr_match = re.match(r"^\s*([0-9a-f]+):", line)
-            addr = int(addr_match.group(1), 16) if addr_match else -1
-            entry_match = re.search(ARM32_JUMP_TABLE_ENTRY_PATTERN, line)
-            if jump_table_entries > 0 and entry_match:
-                value = (
-                    entry_match.group(4)
-                    if is_hexstring(entry_match.group(4))
-                    else entry_match.group(2)
-                )
-                table_entry = self.JumpTableEntry(
-                    cur_addr=addr,
-                    table_start_addr=table_start_addr,
-                    value=int(value, 16),
-                    is_word=entry_match.group(3) == ".word",
-                )
-                jump_table_entries -= 2 if table_entry.is_word else 1
-
-                yield i, table_entry
-                continue
-
-            # Check for jump tables.
-            if re.search(ARM32_JUMP_TABLE_START, line):
-                jump_table_entries = self._jump_table_entries_count(lines, i)
-                table_start_addr = addr
-            yield i, None
-
-    # Returns the number of entries in the jump table starting at `line_no`, or
-    # 0 if it's not a jump table.
-    def _jump_table_entries_count(self, raw_lines: List[str], line_no: int) -> int:
-        # The number of entries should be in the most recent `cmp` before the
-        # jump table.
-        for i in reversed(range(line_no)):
-            cmp_match = re.search(ARM32_COMPARE_IMM_PATTERN, raw_lines[i])
-            if cmp_match:
-                value = immediate_to_int(cmp_match.group(2))
-                if value > 0:
-                    return value + 1
-        return 0
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        arch = self.config.arch
-        if "R_ARM_V4BX" in row:
-            # R_ARM_V4BX converts "bx <reg>" to "mov pc,<reg>" for some targets.
-            # Ignore for now.
-            return prev, None
-        if "R_ARM_ABS32" in row and not prev.startswith(".word"):
-            # Don't crash on R_ARM_ABS32 relocations incorrectly applied to code.
-            # (We may want to do something more fancy here that actually shows the
-            # related symbol, but this serves as a stop-gap.)
-            return prev, None
-        before, imm, after = parse_relocated_line(prev)
-        repl = row.split()[-1] + reloc_addend_from_imm(imm, before, self.config.arch)
-        return before + repl + after, repl
-
-    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
-        if self.config.ignore_addr_diffs:
-            row = self._normalize_bl(mnemonic, row)
-        row = self._normalize_data_pool(row)
-        return row
-
-    def _normalize_bl(self, mnemonic: str, row: str) -> str:
-        if mnemonic != "bl":
-            return row
-
-        row, _ = split_off_address(row)
-        return row + "<ignore>"
-
-    def _normalize_data_pool(self, row: str) -> str:
-        pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row)
-        return pool_match.group(1) if pool_match else row
-
-    def _post_process_jump_tables(self, lines: List["Line"]) -> None:
-        raw_lines = [
-            (
-                f"{line.line_num:x}: {line.original}"
-                if line.line_num is not None
-                else line.original
-            )
-            for line in lines
-        ]
-        for i, jump_table_entry in self._lines_iterator(raw_lines):
-            if jump_table_entry is None:
-                continue
-
-            entry = jump_table_entry
-            lines[i].branch_target = entry.table_start_addr + entry.value + 4
-
-    def _post_process_data_pools(self, lines: List["Line"]) -> None:
-        lines_by_line_number = {}
-        for line in lines:
-            if line.line_num is not None:
-                lines_by_line_number[line.line_num] = line
-        for line in lines:
-            if line.data_pool_addr is None:
-                continue
-
-            # Add data symbol and its address to the line.
-            line_original = lines_by_line_number[line.data_pool_addr].original
-            value = line_original.split()[1]
-            addr = "{:x}".format(line.data_pool_addr)
-            line.original = line.normalized_original + f"={value} ({addr})"
-
-    def post_process(self, lines: List["Line"]) -> None:
-        self._post_process_jump_tables(lines)
-        self._post_process_data_pools(lines)
-
-
-class AsmProcessorAArch64(AsmProcessor):
-    def __init__(self, config: Config) -> None:
-        super().__init__(config)
-        self._adrp_pair_registers: Set[str] = set()
-
-    def _normalize_arch_specific(self, mnemonic: str, row: str) -> str:
-        if self.config.ignore_addr_diffs:
-            row = self._normalize_adrp_differences(mnemonic, row)
-            row = self._normalize_bl(mnemonic, row)
-        return row
-
-    def _normalize_bl(self, mnemonic: str, row: str) -> str:
-        if mnemonic != "bl":
-            return row
-
-        row, _ = split_off_address(row)
-        return row + "<ignore>"
-
-    def _normalize_adrp_differences(self, mnemonic: str, row: str) -> str:
-        """Identifies ADRP + LDR/ADD pairs that are used to access the GOT and
-        suppresses any immediate differences.
-
-        Whenever an ADRP is seen, the destination register is added to the set of registers
-        that are part of an ADRP + LDR/ADD pair. Registers are removed from the set as soon
-        as they are used for an LDR or ADD instruction which completes the pair.
-
-        This method is somewhat crude but should manage to detect most such pairs.
-        """
-        row_parts = row.split("\t", 1)
-        if mnemonic == "adrp":
-            self._adrp_pair_registers.add(row_parts[1].strip().split(",")[0])
-            row, _ = split_off_address(row)
-            return row + "<ignore>"
-        elif mnemonic == "ldr":
-            for reg in self._adrp_pair_registers:
-                # ldr xxx, [reg]
-                # ldr xxx, [reg, <imm>]
-                if f", [{reg}" in row_parts[1]:
-                    self._adrp_pair_registers.remove(reg)
-                    return normalize_imms(row, AARCH64_SETTINGS)
-        elif mnemonic == "add":
-            for reg in self._adrp_pair_registers:
-                # add reg, reg, <imm>
-                if row_parts[1].startswith(f"{reg}, {reg}, "):
-                    self._adrp_pair_registers.remove(reg)
-                    return normalize_imms(row, AARCH64_SETTINGS)
-
-        return row
-
-
-class AsmProcessorI686(AsmProcessor):
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        if "WRTSEG" in row:  # ignore WRTSEG (watcom)
-            return prev, None
-        repl = row.split()[-1]
-        mnemonic, args = prev.split(maxsplit=1)
-        offset = False
-
-        # Calls
-        # Example call a2f
-        # Example call *0
-        # Example jmp  64
-        if mnemonic in I686_BRANCH_INSTRUCTIONS:
-            addr_imm = re.search(r"(^|(?<=\*)|(?<=\*\%cs\:))[0-9a-f]+", args)
-
-        # Direct use of reloc
-        # Example 0x0,0x8(%edi)
-        # Example 0x0,%edi
-        # Example *0x0(,%edx,4)
-        # Example %edi,0
-        # Example movb $0x0,0x0
-        # Example $0x0,0x4(%edi)
-        # Match 0x0 part to replace
-        else:
-            addr_imm = re.search(r"(?:0x)?0+$", args)
-
-        if not addr_imm:
-            addr_imm = re.search(r"(^\$?|(?<=\*))0x0", args)
-
-        # Offset value
-        # Example 0x4,%eax
-        # Example $0x4,%eax
-        if not addr_imm:
-            addr_imm = re.search(r"(^|(?<=\*)|(?<=\$))0x[0-9a-f]+", args)
-            offset = True
-
-        if not addr_imm:
-            addr_imm = re.search(r"(^|(?<=\*)|(?<=\%[fgdecs]s\:))0x[0-9a-f]+", args)
-            offset = True
-
-        if not addr_imm:
-            assert False, f"failed to find address immediate for line '{prev}'"
-
-        start, end = addr_imm.span()
-
-        if "R_386_NONE" in row:
-            pass
-        elif "R_386_32" in row:
-            pass
-        elif "R_386_PC32" in row:
-            pass
-        elif "R_386_16" in row:
-            pass
-        elif "R_386_PC16" in row:
-            pass
-        elif "R_386_8" in row:
-            pass
-        elif "R_386_PC8" in row:
-            pass
-        elif "dir32" in row:
-            if "+" in repl:
-                repl = repl.split("+")[0]
-        elif "DISP32" in row:
-            pass
-        elif "OFF32" in row:
-            pass
-        elif "OFFPC32" in row:
-            if "+" in repl:
-                repl = repl.split("+")[0]
-        elif "R_386_GOT32" in row:
-            repl = f"%got({repl})"
-        elif "R_386_PLT32" in row:
-            repl = f"%plt({repl})"
-        elif "R_386_RELATIVE" in row:
-            repl = f"%rel({repl})"
-        elif "R_386_GOTOFF" in row:
-            repl = f"%got({repl})"
-        elif "R_386_GOTPC" in row:
-            repl = f"%got({repl})"
-        elif "R_386_32PLT" in row:
-            repl = f"%plt({repl})"
-        else:
-            assert False, f"unknown relocation type '{row}' for line '{prev}'"
-
-        if offset:
-            repl = f"{repl}+{addr_imm.group()}"
-
-        return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        return mnemonic == "ret"
-
-
-class AsmProcessorSH2(AsmProcessor):
-    def __init__(self, config: Config) -> None:
-        super().__init__(config)
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        return prev, None
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        return mnemonic == "rts"
-
-
-class AsmProcessorM68k(AsmProcessor):
-    def pre_process(
-        self, mnemonic: str, args: str, next_row: Optional[str], comment: Optional[str]
-    ) -> Tuple[str, str]:
-        # replace objdump's syntax of pointer accesses with the equivilant in AT&T syntax for readability
-        return mnemonic, re.sub(
-            r"%(sp|a[0-7]|fp|pc)@(?:(?:\((-?(?:0x[0-9a-f]+|[0-9]+)) *(,%d[0-7]:[wl])?\))|(\+)|(-))?",
-            r"\5\2(%\1\3)\4",
-            args,
-        )
-
-    def process_reloc(self, row: str, prev: str) -> Tuple[str, Optional[str]]:
-        repl = row.split()[-1]
-        mnemonic, args = prev.split(maxsplit=1)
-
-        addr_imm = re.search(r"(?<![#da])(0x[0-9a-f]+|[0-9]+) ?", args)
-        if not addr_imm:
-            assert False, f"failed to find address immediate for line '{prev}'"
-        start, end = addr_imm.span()
-
-        if "R_68K_NONE" in row:
-            pass
-        elif "R_68K_32" in row:
-            pass
-        elif "R_68K_16" in row:
-            pass
-        elif "R_68K_8" in row:
-            pass
-        elif "R_68K_GOT32O" in row:
-            repl = "@GOT"
-        elif "R_68K_GOT16O" in row:
-            repl += "@GOT"
-        elif "R_68K_GOT8O" in row:
-            repl += "@GOT"
-        elif "R_68K_GOT32" in row:
-            repl += "@GOTPC"
-        elif "R_68K_GOT16" in row:
-            repl += "@GOTPC"
-        elif "R_68K_GOT8" in row:
-            repl += "@GOTPC"
-        else:
-            assert False, f"unknown relocation type '{row}' for line '{prev}'"
-
-        return f"{mnemonic}\t{args[:start]+repl+args[end:]}", repl
-
-    def is_end_of_function(self, mnemonic: str, args: str) -> bool:
-        return mnemonic == "rts" or mnemonic == "rte" or mnemonic == "rtr"
-
-
-@dataclass
-class ArchSettings:
-    name: str
-    re_int: Pattern[str]
-    re_comment: Pattern[str]
-    re_reg: Pattern[str]
-    re_sprel: Pattern[str]
-    re_large_imm: Pattern[str]
-    re_imm: Pattern[str]
-    re_reloc: Pattern[str]
-    branch_instructions: Set[str]
-    instructions_with_address_immediates: Set[str]
-    forbidden: Set[str] = field(default_factory=lambda: set(string.ascii_letters + "_"))
-    arch_flags: List[str] = field(default_factory=list)
-    branch_likely_instructions: Set[str] = field(default_factory=set)
-    proc: Type[AsmProcessor] = AsmProcessor
-    big_endian: Optional[bool] = True
-    delay_slot_instructions: Set[str] = field(default_factory=set)
-
-
-MIPS_BRANCH_LIKELY_INSTRUCTIONS = {
-    "beql",
-    "bnel",
-    "beqzl",
-    "bnezl",
-    "bgezl",
-    "bgtzl",
-    "blezl",
-    "bltzl",
-    "bc1tl",
-    "bc1fl",
-}
-MIPS_BRANCH_INSTRUCTIONS = MIPS_BRANCH_LIKELY_INSTRUCTIONS.union(
-    {
-        "b",
-        "beq",
-        "bne",
-        "beqz",
-        "bnez",
-        "bgez",
-        "bgtz",
-        "blez",
-        "bltz",
-        "bc1t",
-        "bc1f",
-    }
-)
-
-ARM32_PREFIXES = {"b", "bl"}
-ARM32_CONDS = {
-    "",
-    "eq",
-    "ne",
-    "cs",
-    "cc",
-    "mi",
-    "pl",
-    "vs",
-    "vc",
-    "hi",
-    "ls",
-    "ge",
-    "lt",
-    "gt",
-    "le",
-    "al",
-}
-ARM32_SUFFIXES = {"", ".n", ".w"}
-ARM32_BRANCH_INSTRUCTIONS = {
-    f"{prefix}{cond}{suffix}"
-    for prefix in ARM32_PREFIXES
-    for cond in ARM32_CONDS
-    for suffix in ARM32_SUFFIXES
-}
-
-AARCH64_BRANCH_INSTRUCTIONS = {
-    "b",
-    "b.eq",
-    "b.ne",
-    "b.cs",
-    "b.hs",
-    "b.cc",
-    "b.lo",
-    "b.mi",
-    "b.pl",
-    "b.vs",
-    "b.vc",
-    "b.hi",
-    "b.ls",
-    "b.ge",
-    "b.lt",
-    "b.gt",
-    "b.le",
-    "cbz",
-    "cbnz",
-    "tbz",
-    "tbnz",
-}
-
-PPC_BRANCH_INSTRUCTIONS = {
-    "b",
-    "beq",
-    "beq+",
-    "beq-",
-    "bne",
-    "bne+",
-    "bne-",
-    "blt",
-    "blt+",
-    "blt-",
-    "ble",
-    "ble+",
-    "ble-",
-    "bdz",
-    "bdz+",
-    "bdz-",
-    "bdnz",
-    "bdnz+",
-    "bdnz-",
-    "bge",
-    "bge+",
-    "bge-",
-    "bgt",
-    "bgt+",
-    "bgt-",
-    "bso",
-    "bso+",
-    "bso-",
-    "bns",
-    "bns+",
-    "bns-",
-}
-
-I686_BRANCH_INSTRUCTIONS = {
-    "call",
-    "jmp",
-    "ljmp",
-    "ja",
-    "jae",
-    "jb",
-    "jbe",
-    "jc",
-    "jcxz",
-    "jecxz",
-    "jrcxz",
-    "je",
-    "jg",
-    "jge",
-    "jl",
-    "jle",
-    "jna",
-    "jnae",
-    "jnb",
-    "jnbe",
-    "jnc",
-    "jne",
-    "jng",
-    "jnge",
-    "jnl",
-    "jnle",
-    "jno",
-    "jnp",
-    "jns",
-    "jnz",
-    "jo",
-    "jp",
-    "jpe",
-    "jpo",
-    "js",
-    "jz",
-    "ja",
-    "jae",
-    "jb",
-    "jbe",
-    "jc",
-    "je",
-    "jz",
-    "jg",
-    "jge",
-    "jl",
-    "jle",
-    "jna",
-    "jnae",
-    "jnb",
-    "jnbe",
-    "jnc",
-    "jne",
-    "jng",
-    "jnge",
-    "jnl",
-    "jnle",
-    "jno",
-    "jnp",
-    "jns",
-    "jnz",
-    "jo",
-    "jp",
-    "jpe",
-    "jpo",
-    "js",
-    "jz",
-}
-
-SH2_BRANCH_INSTRUCTIONS = {
-    "bf",
-    "bf.s",
-    "bt",
-    "bt.s",
-    "bra",
-    "bsr",
-}
-
-M68K_CONDS = {
-    "ra",
-    "cc",
-    "cs",
-    "eq",
-    "ge",
-    "gt",
-    "hi",
-    "le",
-    "ls",
-    "lt",
-    "mi",
-    "ne",
-    "pl",
-    "vc",
-    "vs",
-}
-
-M68K_BRANCH_INSTRUCTIONS = {
-    f"{prefix}{cond}{suffix}"
-    for prefix in {"b", "db"}
-    for cond in M68K_CONDS
-    for suffix in {"s", "w"}
-}.union(
-    {
-        "dbt",
-        "dbf",
-        "bsrw",
-        "bsrs",
-    }
-)
-
-
-MIPS_SETTINGS = ArchSettings(
-    name="mips",
-    re_int=re.compile(r"[0-9]+"),
-    re_comment=re.compile(r"<.*>"),
-    # Includes:
-    #   - General purpose registers v0..1, a0..7, t0..9, s0..8, zero, at, fp, k0..1/kt0..1
-    #   - Float registers f0..31, or fv0..1, fa0..7, ft0..15, fs0..8 plus odd complements
-    # (actually used number depends on ABI)
-    # sp, gp should not be in this list
-    re_reg=re.compile(r"\$?\b([astv][0-9]|at|f[astv]?[0-9]+f?|kt?[01]|fp|ra|zero)\b"),
-    re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_imm=re.compile(
-        r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(sp)|%(lo|hi|got|gp_rel|call16)\([^)]*\)"
-    ),
-    re_reloc=re.compile(r"R_MIPS_"),
-    arch_flags=["-m", "mips:4300"],
-    branch_likely_instructions=MIPS_BRANCH_LIKELY_INSTRUCTIONS,
-    branch_instructions=MIPS_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=MIPS_BRANCH_INSTRUCTIONS.union(
-        {"j", "jal", "bal"}
-    ),
-    delay_slot_instructions=MIPS_BRANCH_INSTRUCTIONS.union(
-        {"j", "jal", "jr", "jalr", "bal"}
-    ),
-    proc=AsmProcessorMIPS,
-)
-
-MIPSEL_SETTINGS = replace(
-    MIPS_SETTINGS, name="mipsel", big_endian=False, arch_flags=["-m", "mips:3000"]
-)
-
-MIPSEE_SETTINGS = replace(
-    MIPSEL_SETTINGS, name="mipsee", arch_flags=["-m", "mips:5900"]
-)
-
-MIPSEL_4000_SETTINGS = replace(
-    MIPSEL_SETTINGS, name="mipsel:4000", arch_flags=["-m", "mips:gs464"]
-)
-
-MIPS_ARCH_NAMES = {"mips", "mipsel", "mipsee", "mipsel:4000"}
-
-ARM32_SETTINGS = ArchSettings(
-    name="arm32",
-    re_int=re.compile(r"[0-9]+"),
-    re_comment=re.compile(r"(<.*>|//.*$)"),
-    # Includes:
-    #   - General purpose registers: r0..13
-    #   - Frame pointer registers: lr (r14), pc (r15)
-    #   - VFP/NEON registers: s0..31, d0..31, q0..15, fpscr, fpexc, fpsid
-    # SP should not be in this list.
-    re_reg=re.compile(
-        r"\$?\b([rq][0-9]|[rq]1[0-5]|pc|lr|[ds][12]?[0-9]|[ds]3[01]|fp(scr|exc|sid))\b"
-    ),
-    re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_imm=re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
-    re_reloc=re.compile(r"R_ARM_"),
-    branch_instructions=ARM32_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=ARM32_BRANCH_INSTRUCTIONS.union({"adr"}),
-    proc=AsmProcessorARM32,
-)
-
-ARMEL_SETTINGS = replace(ARM32_SETTINGS, name="armel", big_endian=False)
-
-AARCH64_SETTINGS = ArchSettings(
-    name="aarch64",
-    re_int=re.compile(r"[0-9]+"),
-    re_comment=re.compile(r"(<.*>|//.*$)"),
-    # GPRs and FP registers: X0-X30, W0-W30, [BHSDVQ]0..31
-    # (FP registers may be followed by data width and number of elements, e.g. V0.4S)
-    # The zero registers and SP should not be in this list.
-    re_reg=re.compile(
-        r"\$?\b([bhsdvq]([12]?[0-9]|3[01])(\.\d\d?[bhsdvq])?|[xw][12]?[0-9]|[xw]30)\b"
-    ),
-    re_sprel=re.compile(r"sp, #-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_imm=re.compile(r"(?<!sp, )#-?(0x[0-9a-fA-F]+|[0-9]+)\b"),
-    re_reloc=re.compile(r"R_AARCH64_"),
-    arch_flags=["--no-show-raw-insn"],
-    branch_instructions=AARCH64_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=AARCH64_BRANCH_INSTRUCTIONS.union(
-        {"bl", "adrp"}
-    ),
-    proc=AsmProcessorAArch64,
-)
-
-PPC_SETTINGS = ArchSettings(
-    name="ppc",
-    re_int=re.compile(r"[0-9]+"),
-    re_comment=re.compile(r"(<.*>|//.*$)"),
-    # r1 not included
-    re_reg=re.compile(r"\$?\b([rf](?:[02-9]|[1-9][0-9]+)|f1)\b"),
-    re_sprel=re.compile(r"(?<=,)(-?[0-9]+|-?0x[0-9a-f]+)\(r1\)"),
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_imm=re.compile(
-        r"(\b|-)([0-9]+|0x[0-9a-fA-F]+)\b(?!\(r1\))|[^ \t,]+@(l|ha|h|sda21)"
-    ),
-    re_reloc=re.compile(r"R_PPC_"),
-    arch_flags=["-m", "powerpc", "-M", "broadway"],
-    branch_instructions=PPC_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=PPC_BRANCH_INSTRUCTIONS.union({"bl"}),
-    proc=AsmProcessorPPC,
-)
-
-I686_SETTINGS = ArchSettings(
-    name="i686",
-    re_int=re.compile(r"[0-9]+"),
-    re_comment=re.compile(r"<.*>"),
-    # Includes:
-    #   - (e)a-d(x,l,h)
-    #   - (e)s,d,b(i,p)(l)
-    #   - cr0-7
-    #   - x87 st
-    #   - MMX, SSE vector registers
-    #   - cursed registers: eal ebl ebh edl edh...
-    re_reg=re.compile(
-        r"\%?\b(e?(([sd]i|[sb]p)l?|[abcd][xhl])|[cdesfg]s|cr[0-7]|x?mm[0-7]|st)\b"
-    ),
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%ebp|%esi)\))"),
-    re_imm=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)|([\?$_][^ \t,]+)"),
-    re_reloc=re.compile(r"R_386_|dir32|DISP32|WRTSEG|OFF32|OFFPC32"),
-    # The x86 architecture has a variable instruction length. The raw bytes of
-    # an instruction as displayed by objdump can line wrap if it's long enough.
-    # This destroys the objdump output processor logic, so we avoid this.
-    arch_flags=["-m", "i386", "--no-show-raw-insn"],
-    branch_instructions=I686_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=I686_BRANCH_INSTRUCTIONS.union({"mov"}),
-    proc=AsmProcessorI686,
-)
-
-SH2_SETTINGS = ArchSettings(
-    name="sh2",
-    # match -128-127 preceded by a '#' with a ',' after (8 bit immediates)
-    re_int=re.compile(r"(?<=#)(-?(?:1[01][0-9]|12[0-8]|[1-9][0-9]?|0))(?=,)"),
-    # match <text>, match ! and after
-    re_comment=re.compile(r"<.*?>|!.*"),
-    #   - r0-r15 general purpose registers, r15 is stack pointer during exceptions
-    #   - sr, gbr, vbr - control registers
-    #   - mach, macl, pr, pc - system registers
-    re_reg=re.compile(r"r1[0-5]|r[0-9]"),
-    # sh2 has pc-relative and gbr-relative but not stack-pointer-relative
-    re_sprel=re.compile(r"(?<=,)([0-9]+|0x[0-9a-f]+)\(sp\)"),
-    # max immediate size is 8-bit
-    re_large_imm=re.compile(r"-?[1-9][0-9]{2,}|-?0x[0-9a-f]{3,}"),
-    re_imm=re.compile(r"\b0[xX][0-9a-fA-F]+\b"),
-    # https://github.com/bminor/binutils-gdb/blob/master/bfd/elf32-sh-relocs.h#L21
-    re_reloc=re.compile(r"R_SH_"),
-    arch_flags=["-m", "sh2"],
-    branch_instructions=SH2_BRANCH_INSTRUCTIONS,
-    instructions_with_address_immediates=SH2_BRANCH_INSTRUCTIONS.union(
-        {"bf", "bf.s", "bt", "bt.s", "bra", "bsr"}
-    ),
-    delay_slot_instructions=SH2_BRANCH_INSTRUCTIONS.union(
-        {"bf.s", "bt.s", "bra", "braf", "bsr", "bsrf", "jmp", "jsr", "rts"}
-    ),
-    proc=AsmProcessorSH2,
-)
-
-SH4_SETTINGS = replace(
-    SH2_SETTINGS,
-    name="sh4",
-    #   - fr0-fr15, dr0-dr14, xd0-xd14, fv0-fv12 FP registers
-    #     dr/xd registers can only be even-numbered, and fv registers can only be a multiple of 4
-    re_reg=re.compile(
-        r"r1[0-5]|r[0-9]|fr1[0-5]|fr[0-9]|dr[02468]|dr1[024]|xd[02468]|xd1[024]|fv[048]|fv12"
-    ),
-    arch_flags=["-m", "sh4"],
-)
-
-SH4EL_SETTINGS = replace(SH4_SETTINGS, name="sh4el", big_endian=False)
-
-M68K_SETTINGS = ArchSettings(
-    name="m68k",
-    re_int=re.compile(r"[0-9]+"),
-    # '|' is used by assemblers, but is not used by objdump
-    re_comment=re.compile(r"<.*>"),
-    # Includes:
-    # - d0-d7 data registers
-    # - a0-a6 address registers
-    # - fp0-fp7 floating-point registers
-    # - usp (user sp)
-    # - fp, sr, ccr
-    # - fpcr, fpsr, fpiar
-    re_reg=re.compile(r"%\b(d[0-7]|a[0-6]|usp|fp([0-7]|cr|sr|iar)?|sr|ccr)(:[wl])?\b"),
-    # This matches all stack accesses that do not use an index register
-    re_sprel=re.compile(r"-?(0x[0-9a-f]+|[0-9]+)(?=\((%sp|%a7)\))"),
-    re_imm=re.compile(r"#?-?\b(0x[0-9a-f]+|[0-9]+)(?!\()"),
-    re_large_imm=re.compile(r"#?-?([1-9][0-9]{2,}|0x[0-9a-f]{3,})"),
-    re_reloc=re.compile(r"R_68K_"),
-    arch_flags=["-m", "m68k"],
-    branch_instructions=M68K_BRANCH_INSTRUCTIONS,
-    # Pretty much every instruction can take an address immediate
-    instructions_with_address_immediates=M68K_BRANCH_INSTRUCTIONS.union("jmp", "jsr"),
-    proc=AsmProcessorM68k,
-)
-
-ARCH_SETTINGS = [
-    MIPS_SETTINGS,
-    MIPSEL_SETTINGS,
-    MIPSEE_SETTINGS,
-    MIPSEL_4000_SETTINGS,
-    ARM32_SETTINGS,
-    ARMEL_SETTINGS,
-    AARCH64_SETTINGS,
-    PPC_SETTINGS,
-    I686_SETTINGS,
-    SH2_SETTINGS,
-    SH4_SETTINGS,
-    SH4EL_SETTINGS,
-    M68K_SETTINGS,
-]
-
-
-def immediate_to_int(immediate: str) -> int:
-    imm_match = re.match(r"#?(0x)?([0-9a-f]+)", immediate)
-    assert imm_match
-    base = 16 if imm_match.group(1) else 10
-    return int(imm_match.group(2), base)
-
-
-def is_hexstring(value: str) -> bool:
-    try:
-        int(value, 16)
-        return True
-    except ValueError:
-        return False
-
-
-def hexify_int(row: str, pat: Match[str], arch: ArchSettings) -> str:
-    full = pat.group(0)
-
-    # sh2/sh4 only has 8-bit immediates, just convert them uniformly without
-    # any -hex stuff
-    if arch.name == "sh2" or arch.name == "sh4" or arch.name == "sh4el":
-        return hex(int(full) & 0xFF)
-
-    if len(full) <= 1:
-        # leave one-digit ints alone
-        return full
-    start, end = pat.span()
-    if start and row[start - 1] in arch.forbidden:
-        return full
-    if end < len(row) and row[end] in arch.forbidden:
-        return full
-    return hex(int(full))
-
-
-def parse_relocated_line(line: str) -> Tuple[str, str, str]:
-    # Pick out the last argument
-    for c in ",\t ":
-        if c in line:
-            ind2 = line.rindex(c)
-            break
-    else:
-        raise Exception(f"failed to parse relocated line: {line}")
-    before = line[: ind2 + 1]
-    after = line[ind2 + 1 :]
-    # Move an optional ($reg) part of it to 'after'
-    ind2 = after.find("(")
-    if ind2 == -1:
-        imm, after = after, ""
-    else:
-        imm, after = after[:ind2], after[ind2:]
-    return before, imm, after
-
-
-def reloc_addend_from_imm(imm: str, before: str, arch: ArchSettings) -> str:
-    """For architectures like MIPS where relocations have addends embedded in
-    the code as immediates, convert such an immediate into an addition/
-    subtraction that can occur just after the symbol."""
-    # TODO this is incorrect for MIPS %lo/%hi which need to be paired up
-    # and combined. In practice, this means we only get symbol offsets within
-    # %lo, while %hi just shows the symbol. Unfortunately, objdump's output
-    # loses relocation order, so we cannot do this without parsing ELF relocs
-    # ourselves...
-    mnemonic = before.split()[0]
-    if mnemonic in arch.instructions_with_address_immediates:
-        addend = int(imm, 16)
-    else:
-        addend = int(imm, 0)
-    if addend == 0:
-        return ""
-    elif addend < 0:
-        return hex(addend)
-    else:
-        return "+" + hex(addend)
-
-
-def pad_mnemonic(line: str) -> str:
-    if "\t" not in line:
-        return line
-    mn, args = line.split("\t", 1)
-    return f"{mn:<7s} {args}"
-
-
-@dataclass
-class Line:
-    mnemonic: str
-    diff_row: str
-    original: str
-    normalized_original: str
-    scorable_line: str
-    symbol: Optional[str] = None
-    line_num: Optional[int] = None
-    branch_target: Optional[int] = None
-    data_pool_addr: Optional[int] = None
-    source_filename: Optional[str] = None
-    source_line_num: Optional[int] = None
-    source_lines: List[str] = field(default_factory=list)
-    comment: Optional[str] = None
-
-
-def process(dump: str, config: Config) -> List[Line]:
-    arch = config.arch
-    processor = arch.proc(config)
-    source_lines = []
-    source_filename = None
-    source_line_num = None
-    rets_remaining = config.stop_at_ret
-
-    i = 0
-    num_instr = 0
-    data_refs: Dict[int, Dict[str, List[int]]] = defaultdict(lambda: defaultdict(list))
-    output: List[Line] = []
-    lines = dump.split("\n")
-    while i < len(lines):
-        row = lines[i]
-        i += 1
-
-        if not row:
-            continue
-
-        # Check if the currrent line has "OFFSET <SYMBOL>:"
-        function_label_match = re.match(r"^[0-9a-f]+ <(.*)>:$", row)
-
-        if function_label_match:
-            function_name = function_label_match.groups()[0] + ":"
-
-            if config.diff_function_symbols:
-                # If diffing function symbols is enabled
-                # Add the symbol to the diff output
-
-                output.append(
-                    Line(
-                        mnemonic="<label>",
-                        diff_row=function_name,
-                        original=function_name,
-                        normalized_original=function_name,
-                        scorable_line="label " + function_name,
-                    )
-                )
-            continue
-
-        if row.startswith("DATAREF"):
-            parts = row.split(" ", 3)
-            text_offset = int(parts[1])
-            from_offset = int(parts[2])
-            from_section = parts[3]
-            data_refs[text_offset][from_section].append(from_offset)
-            continue
-
-        if config.diff_obj and num_instr >= config.max_function_size_lines:
-            output.append(
-                Line(
-                    mnemonic="...",
-                    diff_row="...",
-                    original="...",
-                    normalized_original="...",
-                    scorable_line="...",
-                )
-            )
-            break
-
-        if not re.match(r"^\s+[0-9a-f]+:\s+", row):
-            # This regex is conservative, and assumes the file path does not contain "weird"
-            # characters like tabs or angle brackets.
-            if re.match(r"^[^ \t<>][^\t<>]*:[0-9]+( \(discriminator [0-9]+\))?$", row):
-                source_filename, _, tail = row.rpartition(":")
-                source_line_num = int(tail.partition(" ")[0])
-            source_lines.append(row)
-            continue
-
-        # If the instructions loads a data pool symbol, extract the address of
-        # the symbol.
-        data_pool_addr = None
-        pool_match = re.search(ARM32_LOAD_POOL_PATTERN, row)
-        if pool_match:
-            offset = pool_match.group(3).split(" ")[0][1:]
-            data_pool_addr = int(offset, 16)
-
-        m_comment = re.search(arch.re_comment, row)
-        comment = m_comment[0] if m_comment else None
-        row = re.sub(arch.re_comment, "", row)
-        line_num_str = row.split(":")[0]
-        row = row.rstrip()
-        tabs = row.split("\t")
-        line_num = eval_line_num(line_num_str.strip())
-
-        # TODO: use --no-show-raw-insn for all arches
-        if arch.name == "i686" or arch.name == "aarch64":
-            row = "\t".join(tabs[1:])
-        else:
-            row = "\t".join(tabs[2:])
-
-        if line_num in data_refs:
-            refs = data_refs[line_num]
-            ref_str = "; ".join(
-                section_name + "+" + ",".join(hex(off) for off in offs)
-                for section_name, offs in refs.items()
-            )
-            output.append(
-                Line(
-                    mnemonic="<data-ref>",
-                    diff_row="<data-ref>",
-                    original=ref_str,
-                    normalized_original=ref_str,
-                    scorable_line="<data-ref>",
-                )
-            )
-
-        if "\t" in row:
-            row_parts = row.split("\t", 1)
-        else:
-            # powerpc-eabi-objdump doesn't use tabs
-            row_parts = [part.lstrip() for part in row.split(" ", 1)]
-
-        mnemonic = row_parts[0].strip()
-        args = row_parts[1].strip() if len(row_parts) >= 2 else ""
-
-        next_line = lines[i] if i < len(lines) else None
-        mnemonic, args = processor.pre_process(mnemonic, args, next_line, comment)
-        row = mnemonic + "\t" + args.replace("\t", "  ")
-
-        addr = ""
-        if mnemonic in arch.instructions_with_address_immediates:
-            row, addr = split_off_address(row)
-            # objdump prefixes addresses with 0x/-0x if they don't resolve to some
-            # symbol + offset. Strip that.
-            addr = addr.replace("0x", "")
-
-        row = re.sub(arch.re_int, lambda m: hexify_int(row, m, arch), row)
-        row += addr
-
-        # Let 'original' be 'row' with relocations applied, while we continue
-        # transforming 'row' into a coarser version that ignores registers and
-        # immediates.
-        original = row
-
-        symbol = None
-        while i < len(lines):
-            reloc_row = lines[i]
-            if re.search(arch.re_reloc, reloc_row):
-                original, reloc_symbol = processor.process_reloc(reloc_row, original)
-                if reloc_symbol is not None:
-                    symbol = reloc_symbol
-            else:
-                break
-            i += 1
-
-        is_text_relative_j = False
-        if (
-            arch.name in MIPS_ARCH_NAMES
-            and mnemonic == "j"
-            and symbol is not None
-            and symbol.startswith(".text")
-        ):
-            symbol = None
-            original = row
-            is_text_relative_j = True
-
-        normalized_original = processor.normalize(mnemonic, original)
-
-        scorable_line = normalized_original
-        if not config.score_stack_differences:
-            scorable_line = re.sub(arch.re_sprel, "addr(sp)", scorable_line)
-
-        row = re.sub(arch.re_reg, "<reg>", row)
-        row = re.sub(arch.re_sprel, "addr(sp)", row)
-        if mnemonic in arch.instructions_with_address_immediates:
-            row = row.strip()
-            row, _ = split_off_address(row)
-            row += "<imm>"
-        else:
-            row = normalize_imms(row, arch)
-
-        branch_target = None
-        if (
-            mnemonic in arch.branch_instructions or is_text_relative_j
-        ) and symbol is None:
-            # Here, we try to match a wide variety of addressing mode:
-            # - Global deref with offset: *0x1234(%eax)
-            # - Global deref: *0x1234
-            # - Register deref: *(%eax)
-            #
-            # We first have a single regex to match register deref and global
-            # deref with offset
-            x86_longjmp = re.search(r"\*(.*)\(", args)
-            if x86_longjmp:
-                capture = x86_longjmp.group(1)
-                if capture != "" and capture.isnumeric():
-                    branch_target = int(capture, 16)
-            else:
-                # Then, we try to match the global deref in a separate regex.
-                x86_longjmp = re.search(r"\*(.*)", args)
-                if x86_longjmp:
-                    capture = x86_longjmp.group(1)
-                    if capture != "" and capture.isnumeric():
-                        branch_target = int(capture, 16)
-                else:
-                    branch_target = int(args.split(",")[-1], 16)
-
-        output.append(
-            Line(
-                mnemonic=mnemonic,
-                diff_row=row,
-                original=original,
-                normalized_original=normalized_original,
-                scorable_line=scorable_line,
-                symbol=symbol,
-                line_num=line_num,
-                branch_target=branch_target,
-                data_pool_addr=data_pool_addr,
-                source_filename=source_filename,
-                source_line_num=source_line_num,
-                source_lines=source_lines,
-                comment=comment,
-            )
-        )
-        num_instr += 1
-        source_lines = []
-
-        if rets_remaining and processor.is_end_of_function(mnemonic, args):
-            rets_remaining -= 1
-            if rets_remaining == 0:
-                break
-
-    processor.post_process(output)
-    return output
-
-
-def normalize_imms(row: str, arch: ArchSettings) -> str:
-    return re.sub(arch.re_imm, "<imm>", row)
-
-
-def normalize_stack(row: str, arch: ArchSettings) -> str:
-    return re.sub(arch.re_sprel, "addr(sp)", row)
-
-
-def check_for_symbol_mismatch(
-    old_line: Line, new_line: Line, symbol_map: Dict[str, str]
-) -> bool:
-    assert old_line.symbol is not None
-    assert new_line.symbol is not None
-
-    if new_line.symbol.startswith("%hi"):
-        return False
-
-    if old_line.symbol not in symbol_map:
-        symbol_map[old_line.symbol] = new_line.symbol
-        return False
-    elif symbol_map[old_line.symbol] == new_line.symbol:
-        return False
-
-    return True
-
-
-def field_matches_any_symbol(field: str, arch: ArchSettings) -> bool:
-    if arch.name == "ppc":
-        if "..." in field:
-            return True
-
-        parts = field.rsplit("@", 1)
-        if len(parts) == 2 and parts[1] in {"l", "h", "ha", "sda21"}:
-            field = parts[0]
-
-        return re.fullmatch((r"^@\d+$"), field) is not None
-
-    if arch.name in MIPS_ARCH_NAMES:
-        return "." in field
-
-    # Example: ".text+0x34"
-    if arch.name == "arm32":
-        return "." in field
-
-    return False
-
-
-def split_off_address(line: str) -> Tuple[str, str]:
-    """Split e.g. 'beqz $r0,1f0' into 'beqz $r0,' and '1f0'."""
-    parts = line.split(",")
-    if len(parts) < 2:
-        parts = line.split(None, 1)
-        if len(parts) < 2:
-            parts.append("")
-    off = len(line) - len(parts[-1].strip())
-    return line[:off], line[off:]
-
-
-def diff_sequences_difflib(
-    seq1: List[str], seq2: List[str]
-) -> Sequence[Tuple[str, int, int, int, int]]:
-    differ = difflib.SequenceMatcher(a=seq1, b=seq2, autojunk=False)
-    return differ.get_opcodes()
-
-
-def diff_sequences(
-    seq1: List[str], seq2: List[str], algorithm: str
-) -> Sequence[Tuple[str, int, int, int, int]]:
-    if algorithm != "levenshtein":
-        return diff_sequences_difflib(seq1, seq2)
-
-    # The Levenshtein library assumes that we compare strings, not lists. Convert.
-    remapping: Dict[str, str] = {}
-
-    def remap(seq: List[str]) -> str:
-        seq = seq[:]
-        for i in range(len(seq)):
-            val = remapping.get(seq[i])
-            if val is None:
-                val = chr(len(remapping))
-                remapping[seq[i]] = val
-            seq[i] = val
-        return "".join(seq)
-
-    try:
-        rem1 = remap(seq1)
-        rem2 = remap(seq2)
-    except ValueError:
-        if len(seq1) + len(seq2) < 0x110000:
-            raise
-        # If there are too many unique elements, chr() doesn't work.
-        # Assume this is the case and fall back to difflib.
-        return diff_sequences_difflib(seq1, seq2)
-
-    import Levenshtein
-
-    ret: List[Tuple[str, int, int, int, int]] = Levenshtein.opcodes(rem1, rem2)
-    return ret
-
-
-def diff_lines(
-    lines1: List[Line],
-    lines2: List[Line],
-    algorithm: str,
-) -> List[Tuple[Optional[Line], Optional[Line]]]:
-    ret = []
-    for tag, i1, i2, j1, j2 in diff_sequences(
-        [line.mnemonic for line in lines1],
-        [line.mnemonic for line in lines2],
-        algorithm,
-    ):
-        for line1, line2 in itertools.zip_longest(lines1[i1:i2], lines2[j1:j2]):
-            if tag == "replace":
-                if line1 is None:
-                    tag = "insert"
-                elif line2 is None:
-                    tag = "delete"
-            elif tag == "insert":
-                assert line1 is None
-            elif tag == "delete":
-                assert line2 is None
-            ret.append((line1, line2))
-
-    return ret
-
-
-def diff_sameline(
-    old_line: Line, new_line: Line, config: Config, symbol_map: Dict[str, str]
-) -> Tuple[int, int, bool]:
-    old = old_line.scorable_line
-    new = new_line.scorable_line
-    if old == new:
-        return (0, 0, False)
-
-    num_stack_penalties = 0
-    num_regalloc_penalties = 0
-    has_symbol_mismatch = False
-
-    ignore_last_field = False
-    if config.score_stack_differences:
-        oldsp = re.search(config.arch.re_sprel, old)
-        newsp = re.search(config.arch.re_sprel, new)
-        if oldsp and newsp:
-            oldrel = int(oldsp.group(1) or "0", 0)
-            newrel = int(newsp.group(1) or "0", 0)
-            num_stack_penalties += abs(oldrel - newrel)
-            ignore_last_field = True
-
-    # Probably regalloc difference, or signed vs unsigned
-
-    # Compare each field in order
-    new_parts, old_parts = new.split(None, 1), old.split(None, 1)
-    newfields = new_parts[1].split(",") if len(new_parts) > 1 else []
-    oldfields = old_parts[1].split(",") if len(old_parts) > 1 else []
-    if ignore_last_field:
-        newfields = newfields[:-1]
-        oldfields = oldfields[:-1]
-    else:
-        # If the last field has a parenthesis suffix, e.g. "0x38(r7)"
-        # we split that part out to make it a separate field
-        # however, we don't split if it has a proceeding % macro, e.g. "%lo(.data)"
-        re_paren = re.compile(r"(?<!%hi)(?<!%lo)(?<!%got)(?<!%call16)(?<!%gp_rel)\(")
-        oldfields = oldfields[:-1] + (
-            re_paren.split(oldfields[-1]) if len(oldfields) > 0 else []
-        )
-        newfields = newfields[:-1] + (
-            re_paren.split(newfields[-1]) if len(newfields) > 0 else []
-        )
-
-    for nf, of in zip(newfields, oldfields):
-        if nf != of:
-            # If the new field is a match to any symbol case
-            # and the old field had a relocation, then ignore this mismatch
-            if (
-                new_line.symbol
-                and old_line.symbol
-                and field_matches_any_symbol(nf, config.arch)
-            ):
-                if check_for_symbol_mismatch(old_line, new_line, symbol_map):
-                    has_symbol_mismatch = True
-                continue
-            num_regalloc_penalties += 1
-
-    # Penalize any extra fields
-    num_regalloc_penalties += abs(len(newfields) - len(oldfields))
-
-    return (num_stack_penalties, num_regalloc_penalties, has_symbol_mismatch)
-
-
-def score_diff_lines(
-    lines: List[Tuple[Optional[Line], Optional[Line]]],
-    config: Config,
-    symbol_map: Dict[str, str],
-) -> int:
-    # This logic is copied from `scorer.py` from the decomp permuter project
-    # https://github.com/simonlindholm/decomp-permuter/blob/main/src/scorer.py
-    num_stack_penalties = 0
-    num_regalloc_penalties = 0
-    num_reordering_penalties = 0
-    num_insertion_penalties = 0
-    num_deletion_penalties = 0
-    deletions = []
-    insertions = []
-
-    def diff_insert(line: str) -> None:
-        # Reordering or totally different codegen.
-        # Defer this until later when we can tell.
-        insertions.append(line)
-
-    def diff_delete(line: str) -> None:
-        deletions.append(line)
-
-    # Find the end of the last long streak of matching mnemonics, if it looks
-    # like the objdump output was truncated. This is used to skip scoring
-    # misaligned lines at the end of the diff.
-    last_mismatch = -1
-    max_index = None
-    lines_were_truncated = False
-    for index, (line1, line2) in enumerate(lines):
-        if (line1 and line1.original == "...") or (line2 and line2.original == "..."):
-            lines_were_truncated = True
-        if line1 and line2 and line1.mnemonic == line2.mnemonic:
-            if index - last_mismatch >= 50:
-                max_index = index
-        else:
-            last_mismatch = index
-    if not lines_were_truncated:
-        max_index = None
-
-    for index, (line1, line2) in enumerate(lines):
-        if max_index is not None and index > max_index:
-            break
-        if line1 and line2 and line1.mnemonic == line2.mnemonic:
-            sp, rp, _ = diff_sameline(line1, line2, config, symbol_map)
-            num_stack_penalties += sp
-            num_regalloc_penalties += rp
-        else:
-            if line1:
-                diff_delete(line1.scorable_line)
-            if line2:
-                diff_insert(line2.scorable_line)
-
-    insertions_co = Counter(insertions)
-    deletions_co = Counter(deletions)
-    for item in insertions_co + deletions_co:
-        ins = insertions_co[item]
-        dels = deletions_co[item]
-        common = min(ins, dels)
-        num_insertion_penalties += ins - common
-        num_deletion_penalties += dels - common
-        num_reordering_penalties += common
-
-    return (
-        num_stack_penalties * config.penalty_stackdiff
-        + num_regalloc_penalties * config.penalty_regalloc
-        + num_reordering_penalties * config.penalty_reordering
-        + num_insertion_penalties * config.penalty_insertion
-        + num_deletion_penalties * config.penalty_deletion
-    )
-
-
-@dataclass(frozen=True)
-class OutputLine:
-    base: Optional[Text] = field(compare=False)
-    fmt2: Text = field(compare=False)
-    key2: Optional[str]
-    boring: bool = field(compare=False)
-    is_data_ref: bool = field(compare=False)
-    line1: Optional[Line] = field(compare=False)
-    line2: Optional[Line] = field(compare=False)
-
-
-@dataclass(frozen=True)
-class Diff:
-    lines: List[OutputLine]
-    score: int
-    max_score: int
-
-
-def trim_nops(lines: List[Line], arch: ArchSettings) -> List[Line]:
-    lines = lines[:]
-    while (
-        lines
-        and lines[-1].mnemonic == "nop"
-        and (len(lines) == 1 or lines[-2].mnemonic not in arch.delay_slot_instructions)
-    ):
-        lines.pop()
-    return lines
-
-
-def do_diff(lines1: List[Line], lines2: List[Line], config: Config) -> Diff:
-    if config.show_source:
-        import cxxfilt
-    arch = config.arch
-    fmt = config.formatter
-    output: List[OutputLine] = []
-    symbol_map: Dict[str, str] = {}
-
-    sc1 = symbol_formatter("base-reg", 0)
-    sc2 = symbol_formatter("my-reg", 0)
-    sc3 = symbol_formatter("base-stack", 4)
-    sc4 = symbol_formatter("my-stack", 4)
-    sc5 = symbol_formatter("base-branch", 0)
-    sc6 = symbol_formatter("my-branch", 0)
-    bts1: Set[int] = set()
-    bts2: Set[int] = set()
-
-    if config.show_branches:
-        for lines, btset, sc in [
-            (lines1, bts1, sc5),
-            (lines2, bts2, sc6),
-        ]:
-            for line in lines:
-                bt = line.branch_target
-                if bt is not None:
-                    btset.add(bt)
-                    sc(str(bt))
-
-    lines1 = trim_nops(lines1, arch)
-    lines2 = trim_nops(lines2, arch)
-
-    diffed_lines = diff_lines(lines1, lines2, config.algorithm)
-
-    line_num_base = -1
-    line_num_offset = 0
-    line_num_2to1 = {}
-    for line1, line2 in diffed_lines:
-        if line1 is not None and line1.line_num is not None:
-            line_num_base = line1.line_num
-            line_num_offset = 0
-        else:
-            line_num_offset += 1
-        if line2 is not None and line2.line_num is not None:
-            line_num_2to1[line2.line_num] = (line_num_base, line_num_offset)
-
-    for line1, line2 in diffed_lines:
-        line_color1 = line_color2 = sym_color = BasicFormat.NONE
-        line_prefix = " "
-        is_data_ref = False
-        out1 = Text() if not line1 else Text(pad_mnemonic(line1.original))
-        out2 = Text() if not line2 else Text(pad_mnemonic(line2.original))
-        if line1 and line2 and line1.diff_row == line2.diff_row:
-            if line1.diff_row == "<data-ref>":
-                if line1.normalized_original != line2.normalized_original:
-                    line_prefix = "i"
-                    sym_color = BasicFormat.DIFF_CHANGE
-                    out1 = out1.reformat(sym_color)
-                    out2 = out2.reformat(sym_color)
-                is_data_ref = True
-            elif (
-                line1.normalized_original == line2.normalized_original
-                and line2.branch_target is None
-            ):
-                # Fast path: no coloring needed. We don't include branch instructions
-                # in this case because we need to check that their targets line up in
-                # the diff, and don't just happen to have the are the same address
-                # by accident.
-                pass
-            else:
-                mnemonic = line1.original.split()[0]
-                branchless1, address1 = out1.plain(), ""
-                branchless2, address2 = out2.plain(), ""
-                if mnemonic in arch.instructions_with_address_immediates:
-                    branchless1, address1 = split_off_address(branchless1)
-                    branchless2, address2 = split_off_address(branchless2)
-
-                out1 = Text(branchless1)
-                out2 = Text(branchless2)
-                out1, out2 = format_fields(
-                    arch.re_imm, out1, out2, lambda _: BasicFormat.IMMEDIATE
-                )
-
-                if line2.branch_target is not None:
-                    target = line2.branch_target
-                    line2_target = line_num_2to1.get(line2.branch_target)
-                    if line2_target is None:
-                        # If the target is outside the disassembly, extrapolate.
-                        # This only matters near the bottom.
-                        assert line2.line_num is not None
-                        line2_line = line_num_2to1[line2.line_num]
-                        line2_target = (line2_line[0] + (target - line2.line_num), 0)
-
-                    # Adjust the branch target for scoring and three-way diffing.
-                    norm2, norm_branch2 = split_off_address(line2.normalized_original)
-                    if norm_branch2 != "<ignore>":
-                        retargetted = hex(line2_target[0]).replace("0x", "")
-                        if line2_target[1] != 0:
-                            retargetted += f"+{line2_target[1]}"
-                        line2.normalized_original = norm2 + retargetted
-                        sc_base, _ = split_off_address(line2.scorable_line)
-                        line2.scorable_line = sc_base + retargetted
-                    same_target = line2_target == (line1.branch_target, 0)
-                else:
-                    # Do a naive comparison for non-branches (e.g. function calls).
-                    same_target = address1 == address2
-
-                if normalize_imms(branchless1, arch) == normalize_imms(
-                    branchless2, arch
-                ):
-                    (
-                        stack_penalties,
-                        regalloc_penalties,
-                        has_symbol_mismatch,
-                    ) = diff_sameline(line1, line2, config, symbol_map)
-
-                    if (
-                        regalloc_penalties == 0
-                        and stack_penalties == 0
-                        and not has_symbol_mismatch
-                    ):
-                        # ignore differences due to %lo(.rodata + ...) vs symbol
-                        out1 = out1.reformat(BasicFormat.NONE)
-                        out2 = out2.reformat(BasicFormat.NONE)
-                    elif line2.branch_target is not None and same_target:
-                        # same-target branch, don't color
-                        pass
-                    else:
-                        # must have an imm difference (or else we would have hit the
-                        # fast path)
-                        sym_color = BasicFormat.IMMEDIATE
-                        line_prefix = "i"
-                else:
-                    out1, out2 = format_fields(arch.re_sprel, out1, out2, sc3, sc4)
-                    if normalize_stack(branchless1, arch) == normalize_stack(
-                        branchless2, arch
-                    ):
-                        # only stack differences (luckily stack and imm
-                        # differences can't be combined in MIPS, so we
-                        # don't have to think about that case)
-                        sym_color = BasicFormat.STACK
-                        line_prefix = "s"
-                    else:
-                        # reg differences and maybe imm as well
-                        out1, out2 = format_fields(arch.re_reg, out1, out2, sc1, sc2)
-                        cats = config.reg_categories
-                        if cats and any(
-                            cats.get(of.group()) != cats.get(nf.group())
-                            for (of, nf) in zip(
-                                out1.finditer(arch.re_reg), out2.finditer(arch.re_reg)
-                            )
-                        ):
-                            sym_color = BasicFormat.REGISTER_CATEGORY
-                            line_prefix = "R"
-                        else:
-                            sym_color = BasicFormat.REGISTER
-                            line_prefix = "r"
-                        line_color1 = line_color2 = sym_color
-
-                if same_target:
-                    address_imm_fmt = BasicFormat.NONE
-                else:
-                    address_imm_fmt = BasicFormat.IMMEDIATE
-                out1 += Text(address1, address_imm_fmt)
-                out2 += Text(address2, address_imm_fmt)
-        elif line1 and line2:
-            line_prefix = "|"
-            line_color1 = line_color2 = sym_color = BasicFormat.DIFF_CHANGE
-            out1 = out1.reformat(line_color1)
-            out2 = out2.reformat(line_color2)
-        elif line1:
-            line_prefix = "<"
-            line_color1 = sym_color = BasicFormat.DIFF_REMOVE
-            out1 = out1.reformat(line_color1)
-            out2 = Text()
-        elif line2:
-            line_prefix = ">"
-            line_color2 = sym_color = BasicFormat.DIFF_ADD
-            out1 = Text()
-            out2 = out2.reformat(line_color2)
-
-        if config.show_source and line2 and line2.comment:
-            out2 += f" {line2.comment}"
-
-        def format_part(
-            out: Text,
-            line: Optional[Line],
-            line_color: Format,
-            btset: Set[int],
-            sc: FormatFunction,
-        ) -> Optional[Text]:
-            if line is None:
-                return None
-            if line.line_num is None:
-                return out
-            in_arrow = Text("  ")
-            out_arrow = Text()
-            if config.show_branches:
-                if line.line_num in btset:
-                    in_arrow = Text("~>", sc(str(line.line_num)))
-                if line.branch_target is not None:
-                    out_arrow = " " + Text("~>", sc(str(line.branch_target)))
-            formatted_line_num = Text(hex(line.line_num)[2:] + ":", line_color)
-            return formatted_line_num + " " + in_arrow + " " + out + out_arrow
-
-        part1 = format_part(out1, line1, line_color1, bts1, sc5)
-        part2 = format_part(out2, line2, line_color2, bts2, sc6)
-
-        if config.show_source and line2:
-            for source_line in line2.source_lines:
-                line_format = BasicFormat.SOURCE_OTHER
-                if config.source_old_binutils:
-                    if source_line and re.fullmatch(r".*\.c(?:pp)?:\d+", source_line):
-                        line_format = BasicFormat.SOURCE_FILENAME
-                    elif source_line and source_line.endswith("():"):
-                        line_format = BasicFormat.SOURCE_FUNCTION
-                        try:
-                            source_line = cxxfilt.demangle(
-                                source_line[:-3], external_only=False
-                            )
-                        except:
-                            pass
-                else:
-                    # File names and function names
-                    if source_line and source_line[0] != "│":
-                        line_format = BasicFormat.SOURCE_FILENAME
-                        # Function names
-                        if source_line.endswith("():"):
-                            line_format = BasicFormat.SOURCE_FUNCTION
-                            try:
-                                source_line = cxxfilt.demangle(
-                                    source_line[:-3], external_only=False
-                                )
-                            except:
-                                pass
-                padding = " " * 7 if config.show_line_numbers else " " * 2
-                output.append(
-                    OutputLine(
-                        base=None,
-                        fmt2=padding + Text(source_line, line_format),
-                        key2=source_line,
-                        boring=True,
-                        is_data_ref=False,
-                        line1=None,
-                        line2=None,
-                    )
-                )
-
-        key2 = line2.normalized_original if line2 else None
-        boring = False
-        if line_prefix == " ":
-            boring = True
-        elif config.compress and config.compress.same_instr and line_prefix in "irs":
-            boring = True
-
-        if config.show_line_numbers:
-            if line2 and line2.source_line_num is not None:
-                num_color = (
-                    BasicFormat.SOURCE_LINE_NUM
-                    if sym_color == BasicFormat.NONE
-                    else sym_color
-                )
-                num2 = Text(f"{line2.source_line_num:5}", num_color)
-            else:
-                num2 = Text(" " * 5)
-        else:
-            num2 = Text()
-
-        fmt2 = Text(line_prefix, sym_color) + num2 + " " + (part2 or Text())
-
-        output.append(
-            OutputLine(
-                base=part1,
-                fmt2=fmt2,
-                key2=key2,
-                boring=boring,
-                is_data_ref=is_data_ref,
-                line1=line1,
-                line2=line2,
-            )
-        )
-
-    output = output[config.skip_lines :]
-
-    score = score_diff_lines(diffed_lines, config, symbol_map)
-    max_score = len(lines1) * config.penalty_deletion
-    return Diff(lines=output, score=score, max_score=max_score)
-
-
-def chunk_diff_lines(
-    diff: List[OutputLine],
-) -> List[Union[List[OutputLine], OutputLine]]:
-    """Chunk a diff into an alternating list like A B A B ... A, where:
-    * A is a List[OutputLine] of insertions,
-    * B is a single non-insertion OutputLine, with .base != None."""
-    cur_right: List[OutputLine] = []
-    chunks: List[Union[List[OutputLine], OutputLine]] = []
-    for output_line in diff:
-        if output_line.base is not None:
-            chunks.append(cur_right)
-            chunks.append(output_line)
-            cur_right = []
-        else:
-            cur_right.append(output_line)
-    chunks.append(cur_right)
-    return chunks
-
-
-def compress_matching(
-    li: List[Tuple[OutputLine, ...]], context: int
-) -> List[Tuple[OutputLine, ...]]:
-    ret: List[Tuple[OutputLine, ...]] = []
-    matching_streak: List[Tuple[OutputLine, ...]] = []
-    context = max(context, 0)
-
-    def flush_matching() -> None:
-        if len(matching_streak) <= 2 * context + 1:
-            ret.extend(matching_streak)
-        else:
-            ret.extend(matching_streak[:context])
-            skipped = len(matching_streak) - 2 * context
-            filler = OutputLine(
-                base=Text(f"<{skipped} lines>", BasicFormat.SOURCE_OTHER),
-                fmt2=Text(),
-                key2=None,
-                boring=False,
-                is_data_ref=False,
-                line1=None,
-                line2=None,
-            )
-            columns = len(matching_streak[0])
-            ret.append(tuple([filler] * columns))
-            if context > 0:
-                ret.extend(matching_streak[-context:])
-        matching_streak.clear()
-
-    for line in li:
-        if line[0].boring:
-            matching_streak.append(line)
-        else:
-            flush_matching()
-            ret.append(line)
-
-    flush_matching()
-    return ret
-
-
-def align_diffs(old_diff: Diff, new_diff: Diff, config: Config) -> TableData:
-    headers: Tuple[Text, ...]
-    diff_lines: List[Tuple[OutputLine, ...]]
-    padding = " " * 7 if config.show_line_numbers else " " * 2
-
-    if config.diff_mode in (DiffMode.THREEWAY_PREV, DiffMode.THREEWAY_BASE):
-        old_chunks = chunk_diff_lines(old_diff.lines)
-        new_chunks = chunk_diff_lines(new_diff.lines)
-        diff_lines = []
-        empty = OutputLine(Text(), Text(), None, True, False, None, None)
-        assert len(old_chunks) == len(new_chunks), "same target"
-        for old_chunk, new_chunk in zip(old_chunks, new_chunks):
-            if isinstance(old_chunk, list):
-                assert isinstance(new_chunk, list)
-                if not old_chunk and not new_chunk:
-                    # Most of the time lines sync up without insertions/deletions,
-                    # and there's no interdiffing to be done.
-                    continue
-                differ = difflib.SequenceMatcher(
-                    a=old_chunk, b=new_chunk, autojunk=False
-                )
-                for tag, i1, i2, j1, j2 in differ.get_opcodes():
-                    if tag in ["equal", "replace"]:
-                        for i, j in zip(range(i1, i2), range(j1, j2)):
-                            diff_lines.append((empty, new_chunk[j], old_chunk[i]))
-                    if tag in ["insert", "replace"]:
-                        for j in range(j1 + i2 - i1, j2):
-                            diff_lines.append((empty, new_chunk[j], empty))
-                    if tag in ["delete", "replace"]:
-                        for i in range(i1 + j2 - j1, i2):
-                            diff_lines.append((empty, empty, old_chunk[i]))
-            else:
-                assert isinstance(new_chunk, OutputLine)
-                # old_chunk.base and new_chunk.base have the same text since
-                # both diffs are based on the same target, but they might
-                # differ in color. Use the new version.
-                diff_lines.append((new_chunk, new_chunk, old_chunk))
-        diff_lines = [
-            (base, new, old if old != new else empty) for base, new, old in diff_lines
-        ]
-        headers = (
-            Text("TARGET"),
-            Text(f"{padding}CURRENT ({new_diff.score})"),
-            Text(f"{padding}PREVIOUS ({old_diff.score})"),
-        )
-        current_score = new_diff.score
-        max_score = new_diff.max_score
-        previous_score = old_diff.score
-    elif config.diff_mode in (DiffMode.SINGLE, DiffMode.SINGLE_BASE):
-        header = Text("BASE" if config.diff_mode == DiffMode.SINGLE_BASE else "CURRENT")
-        diff_lines = [(line,) for line in new_diff.lines]
-        headers = (header,)
-        # Scoring is disabled for view mode
-        current_score = 0
-        max_score = 0
-        previous_score = None
-    else:
-        diff_lines = [(line, line) for line in new_diff.lines]
-        headers = (
-            Text("TARGET"),
-            Text(f"{padding}CURRENT ({new_diff.score})"),
-        )
-        current_score = new_diff.score
-        max_score = new_diff.max_score
-        previous_score = None
-    if config.compress:
-        diff_lines = compress_matching(diff_lines, config.compress.context)
-
-    def diff_line_to_table_line(line: Tuple[OutputLine, ...]) -> TableLine:
-        cells = [
-            (line[0].base or Text(), line[0].line1),
-        ]
-        for ol in line[1:]:
-            cells.append((ol.fmt2, ol.line2))
-
-        return TableLine(
-            key=line[0].key2,
-            is_data_ref=line[0].is_data_ref,
-            cells=tuple(cells),
-        )
-
-    return TableData(
-        headers=headers,
-        current_score=current_score,
-        max_score=max_score,
-        previous_score=previous_score,
-        lines=[diff_line_to_table_line(line) for line in diff_lines],
-    )
-
-
-def debounced_fs_watch(
-    targets: List[str],
-    outq: "queue.Queue[Optional[float]]",
-    config: Config,
-    project: ProjectSettings,
-) -> None:
-    import watchdog.events
-    import watchdog.observers
-
-    class WatchEventHandler(watchdog.events.FileSystemEventHandler):
-        def __init__(
-            self, queue: "queue.Queue[float]", file_targets: List[str]
-        ) -> None:
-            self.queue = queue
-            self.file_targets = file_targets
-
-        def on_modified(self, ev: object) -> None:
-            if isinstance(ev, watchdog.events.FileModifiedEvent):
-                self.changed(ev.src_path)
-
-        def on_moved(self, ev: object) -> None:
-            if isinstance(ev, watchdog.events.FileMovedEvent):
-                self.changed(ev.dest_path)
-
-        def should_notify(self, path: str) -> bool:
-            for target in self.file_targets:
-                if os.path.normpath(path) == target:
-                    return True
-            if config.make and any(
-                path.endswith(suffix) for suffix in project.source_extensions
-            ):
-                return True
-            return False
-
-        def changed(self, path: str) -> None:
-            if self.should_notify(path):
-                self.queue.put(time.time())
-
-    def debounce_thread() -> NoReturn:
-        listenq: "queue.Queue[float]" = queue.Queue()
-        file_targets: List[str] = []
-        event_handler = WatchEventHandler(listenq, file_targets)
-        observer = watchdog.observers.Observer()
-        observed = set()
-        for target in targets:
-            if os.path.isdir(target):
-                observer.schedule(event_handler, target, recursive=True)  # type: ignore
-            else:
-                file_targets.append(os.path.normpath(target))
-                target = os.path.dirname(target) or "."
-                if target not in observed:
-                    observed.add(target)
-                    observer.schedule(event_handler, target)  # type: ignore
-        observer.start()  # type: ignore
-        while True:
-            t = listenq.get()
-            more = True
-            while more:
-                delay = t + DEBOUNCE_DELAY - time.time()
-                if delay > 0:
-                    time.sleep(delay)
-                # consume entire queue
-                more = False
-                try:
-                    while True:
-                        t = listenq.get(block=False)
-                        more = True
-                except queue.Empty:
-                    pass
-            outq.put(t)
-
-    th = threading.Thread(target=debounce_thread, daemon=True)
-    th.start()
-
-
-class Display:
-    basedump: str
-    mydump: str
-    last_refresh_key: object
-    config: Config
-    emsg: Optional[str]
-    last_diff_output: Optional[Diff]
-    pending_update: Optional[str]
-    ready_queue: "queue.Queue[None]"
-    watch_queue: "queue.Queue[Optional[float]]"
-    less_proc: "Optional[subprocess.Popen[bytes]]"
-
-    def __init__(self, basedump: str, mydump: str, config: Config) -> None:
-        self.config = config
-        self.base_lines = process(basedump, config)
-        self.mydump = mydump
-        self.emsg = None
-        self.last_refresh_key = None
-        self.last_diff_output = None
-
-    def run_diff(self) -> Tuple[str, object]:
-        if self.emsg is not None:
-            return (self.emsg, self.emsg)
-
-        my_lines = process(self.mydump, self.config)
-
-        if self.config.diff_mode == DiffMode.SINGLE_BASE:
-            diff_output = do_diff(self.base_lines, self.base_lines, self.config)
-        elif self.config.diff_mode == DiffMode.SINGLE:
-            diff_output = do_diff(my_lines, my_lines, self.config)
-        else:
-            diff_output = do_diff(self.base_lines, my_lines, self.config)
-
-        last_diff_output = self.last_diff_output or diff_output
-        if self.config.diff_mode != DiffMode.THREEWAY_BASE or not self.last_diff_output:
-            self.last_diff_output = diff_output
-
-        data = align_diffs(last_diff_output, diff_output, self.config)
-        output = self.config.formatter.table(data)
-
-        refresh_key = (
-            [line.key2 for line in diff_output.lines],
-            diff_output.score,
-        )
-
-        return (output, refresh_key)
-
-    def run_less(
-        self, output: str
-    ) -> "Tuple[subprocess.Popen[bytes], subprocess.Popen[bytes]]":
-        # Pipe the output through 'tail' and only then to less, to ensure the
-        # write call doesn't block. ('tail' has to buffer all its input before
-        # it starts writing.) This also means we don't have to deal with pipe
-        # closure errors.
-        buffer_proc = subprocess.Popen(
-            BUFFER_CMD, stdin=subprocess.PIPE, stdout=subprocess.PIPE
-        )
-        less_proc = subprocess.Popen(LESS_CMD, stdin=buffer_proc.stdout)
-        assert buffer_proc.stdin
-        assert buffer_proc.stdout
-        buffer_proc.stdin.write(output.encode())
-        buffer_proc.stdin.close()
-        buffer_proc.stdout.close()
-        return (buffer_proc, less_proc)
-
-    def run_sync(self) -> None:
-        output, _ = self.run_diff()
-        proca, procb = self.run_less(output)
-        procb.wait()
-        proca.wait()
-
-    def run_async(self, watch_queue: "queue.Queue[Optional[float]]") -> None:
-        self.watch_queue = watch_queue
-        self.ready_queue = queue.Queue()
-        self.pending_update = None
-        output, refresh_key = self.run_diff()
-        self.last_refresh_key = refresh_key
-        dthread = threading.Thread(target=self.display_thread, args=(output,))
-        dthread.start()
-        self.ready_queue.get()
-
-    def display_thread(self, initial_output: str) -> None:
-        proca, procb = self.run_less(initial_output)
-        self.less_proc = procb
-        self.ready_queue.put(None)
-        while True:
-            ret = procb.wait()
-            proca.wait()
-            self.less_proc = None
-            if ret != 0:
-                # fix the terminal
-                os.system("tput reset")
-            if ret != 0 and self.pending_update is not None:
-                # killed by program with the intent to refresh
-                output = self.pending_update
-                self.pending_update = None
-                proca, procb = self.run_less(output)
-                self.less_proc = procb
-                self.ready_queue.put(None)
-            else:
-                # terminated by user, or killed
-                self.watch_queue.put(None)
-                self.ready_queue.put(None)
-                break
-
-    def progress(self, msg: str) -> None:
-        # Write message to top-left corner
-        sys.stdout.write("\x1b7\x1b[1;1f{}\x1b8".format(msg + " "))
-        sys.stdout.flush()
-
-    def update(self, text: str, error: bool) -> None:
-        if not error and not self.emsg and text == self.mydump:
-            self.progress("Unchanged. ")
-            return
-        if not error:
-            self.mydump = text
-            self.emsg = None
-        else:
-            self.emsg = text
-        output, refresh_key = self.run_diff()
-        if refresh_key == self.last_refresh_key:
-            self.progress("Unchanged. ")
-            return
-        self.last_refresh_key = refresh_key
-        self.pending_update = output
-        if not self.less_proc:
-            return
-        self.less_proc.kill()
-        self.ready_queue.get()
-
-    def terminate(self) -> None:
-        if not self.less_proc:
-            return
-        self.less_proc.kill()
-        self.ready_queue.get()
-
-
-def main() -> None:
-    args = parser.parse_args()
-
-    # Apply project-specific configuration.
-    settings: Dict[str, Any] = {}
-    diff_settings.apply(settings, args)  # type: ignore
-    project = create_project_settings(settings)
-
-    try:
-        config = create_config(args, project)
-    except ValueError as e:
-        fail(str(e))
-
-    if config.algorithm == "levenshtein":
-        try:
-            import Levenshtein
-        except ModuleNotFoundError as e:
-            fail(MISSING_PREREQUISITES.format(e.name))
-
-    if config.show_source:
-        try:
-            import cxxfilt
-        except ModuleNotFoundError as e:
-            fail(MISSING_PREREQUISITES.format(e.name))
-
-    if (
-        config.diff_mode in (DiffMode.THREEWAY_BASE, DiffMode.THREEWAY_PREV)
-        and not args.watch
-    ):
-        fail("Threeway diffing requires -w.")
-
-    if args.diff_elf_symbol:
-        make_target, basecmd, mycmd = dump_elf(
-            args.start, args.end, args.diff_elf_symbol, config, project
-        )
-    elif config.diff_obj:
-        make_target, basecmd, mycmd = dump_objfile(
-            args.start, args.end, config, project
-        )
-    else:
-        make_target, basecmd, mycmd = dump_binary(args.start, args.end, config, project)
-
-    map_build_target_fn = getattr(diff_settings, "map_build_target", None)
-    if map_build_target_fn:
-        make_target = map_build_target_fn(make_target=make_target)
-
-    if args.write_asm is not None:
-        mydump = run_objdump(mycmd, config, project)
-        with open(args.write_asm, "w") as f:
-            f.write(mydump)
-        print(f"Wrote assembly to {args.write_asm}.")
-        sys.exit(0)
-
-    if args.base_asm is not None:
-        with open(args.base_asm) as f:
-            basedump = f.read()
-    elif config.diff_mode != DiffMode.SINGLE:
-        basedump = run_objdump(basecmd, config, project)
-    else:
-        basedump = ""
-
-    mydump = run_objdump(mycmd, config, project)
-
-    display = Display(basedump, mydump, config)
-
-    if args.no_pager or args.format in ("html", "json"):
-        print(display.run_diff()[0])
-    elif not args.watch:
-        display.run_sync()
-    else:
-        if not args.make and not args.agree:
-            yn = input(
-                "Warning: watch-mode (-w) enabled without auto-make (-m) or agree-all (-y). "
-                "You will have to run make manually. Ok? (Y/n) "
-            )
-            if yn.lower() == "n":
-                return
-        if args.make:
-            watch_sources = None
-            watch_sources_for_target_fn = getattr(
-                diff_settings, "watch_sources_for_target", None
-            )
-            if watch_sources_for_target_fn:
-                watch_sources = watch_sources_for_target_fn(make_target)
-            watch_sources = watch_sources or project.source_directories
-            if not watch_sources:
-                fail("Missing source_directories config, don't know what to watch.")
-        else:
-            watch_sources = [make_target]
-        q: "queue.Queue[Optional[float]]" = queue.Queue()
-        debounced_fs_watch(watch_sources, q, config, project)
-        display.run_async(q)
-        last_build = 0.0
-        try:
-            while True:
-                t = q.get()
-                if t is None:
-                    break
-                if t < last_build:
-                    continue
-                last_build = time.time()
-                if args.make:
-                    display.progress("Building...")
-                    ret = run_make_capture_output(make_target, project)
-                    if ret.returncode != 0:
-                        display.update(
-                            ret.stderr.decode("utf-8-sig", "replace")
-                            or ret.stdout.decode("utf-8-sig", "replace"),
-                            error=True,
-                        )
-                        continue
-                mydump = run_objdump(mycmd, config, project)
-                display.update(mydump, error=False)
-        except KeyboardInterrupt:
-            display.terminate()
-
-
-if __name__ == "__main__":
-    main()
+main()
diff --git a/mypy.ini b/mypy.ini
index 6178f53..040472a 100644
--- a/mypy.ini
+++ b/mypy.ini
@@ -10,7 +10,7 @@ warn_redundant_casts = True
 warn_return_any = True
 ignore_missing_imports = True
 python_version = 3.8
-files = diff.py, test.py
+packages = asm_differ
 
 [mypy-diff_settings]
 ignore_errors = True
diff --git a/poetry.lock b/poetry.lock
index 9277909..783d2e4 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -14,6 +14,77 @@ files = [
 [package.dependencies]
 textwrap3 = ">=0.9.2"
 
+[[package]]
+name = "black"
+version = "24.4.2"
+description = "The uncompromising code formatter."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "black-24.4.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dd1b5a14e417189db4c7b64a6540f31730713d173f0b63e55fabd52d61d8fdce"},
+    {file = "black-24.4.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e537d281831ad0e71007dcdcbe50a71470b978c453fa41ce77186bbe0ed6021"},
+    {file = "black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaea3008c281f1038edb473c1aa8ed8143a5535ff18f978a318f10302b254063"},
+    {file = "black-24.4.2-cp310-cp310-win_amd64.whl", hash = "sha256:7768a0dbf16a39aa5e9a3ded568bb545c8c2727396d063bbaf847df05b08cd96"},
+    {file = "black-24.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:257d724c2c9b1660f353b36c802ccece186a30accc7742c176d29c146df6e474"},
+    {file = "black-24.4.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:bdde6f877a18f24844e381d45e9947a49e97933573ac9d4345399be37621e26c"},
+    {file = "black-24.4.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e151054aa00bad1f4e1f04919542885f89f5f7d086b8a59e5000e6c616896ffb"},
+    {file = "black-24.4.2-cp311-cp311-win_amd64.whl", hash = "sha256:7e122b1c4fb252fd85df3ca93578732b4749d9be076593076ef4d07a0233c3e1"},
+    {file = "black-24.4.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:accf49e151c8ed2c0cdc528691838afd217c50412534e876a19270fea1e28e2d"},
+    {file = "black-24.4.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:88c57dc656038f1ab9f92b3eb5335ee9b021412feaa46330d5eba4e51fe49b04"},
+    {file = "black-24.4.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:be8bef99eb46d5021bf053114442914baeb3649a89dc5f3a555c88737e5e98fc"},
+    {file = "black-24.4.2-cp312-cp312-win_amd64.whl", hash = "sha256:415e686e87dbbe6f4cd5ef0fbf764af7b89f9057b97c908742b6008cc554b9c0"},
+    {file = "black-24.4.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:bf10f7310db693bb62692609b397e8d67257c55f949abde4c67f9cc574492cc7"},
+    {file = "black-24.4.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:98e123f1d5cfd42f886624d84464f7756f60ff6eab89ae845210631714f6db94"},
+    {file = "black-24.4.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:48a85f2cb5e6799a9ef05347b476cce6c182d6c71ee36925a6c194d074336ef8"},
+    {file = "black-24.4.2-cp38-cp38-win_amd64.whl", hash = "sha256:b1530ae42e9d6d5b670a34db49a94115a64596bc77710b1d05e9801e62ca0a7c"},
+    {file = "black-24.4.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:37aae07b029fa0174d39daf02748b379399b909652a806e5708199bd93899da1"},
+    {file = "black-24.4.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:da33a1a5e49c4122ccdfd56cd021ff1ebc4a1ec4e2d01594fef9b6f267a9e741"},
+    {file = "black-24.4.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef703f83fc32e131e9bcc0a5094cfe85599e7109f896fe8bc96cc402f3eb4b6e"},
+    {file = "black-24.4.2-cp39-cp39-win_amd64.whl", hash = "sha256:b9176b9832e84308818a99a561e90aa479e73c523b3f77afd07913380ae2eab7"},
+    {file = "black-24.4.2-py3-none-any.whl", hash = "sha256:d36ed1124bb81b32f8614555b34cc4259c3fbc7eec17870e8ff8ded335b58d8c"},
+    {file = "black-24.4.2.tar.gz", hash = "sha256:c872b53057f000085da66a19c55d68f6f8ddcac2642392ad3a355878406fbd4d"},
+]
+
+[package.dependencies]
+click = ">=8.0.0"
+mypy-extensions = ">=0.4.3"
+packaging = ">=22.0"
+pathspec = ">=0.9.0"
+platformdirs = ">=2"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = {version = ">=4.0.1", markers = "python_version < \"3.11\""}
+
+[package.extras]
+colorama = ["colorama (>=0.4.3)"]
+d = ["aiohttp (>=3.7.4)", "aiohttp (>=3.7.4,!=3.9.0)"]
+jupyter = ["ipython (>=7.8.0)", "tokenize-rt (>=3.2.0)"]
+uvloop = ["uvloop (>=0.15.2)"]
+
+[[package]]
+name = "cfgv"
+version = "3.4.0"
+description = "Validate configuration and produce human readable error messages."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"},
+    {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"},
+]
+
+[[package]]
+name = "click"
+version = "8.1.7"
+description = "Composable command line interface toolkit"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
+    {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@@ -39,6 +110,47 @@ files = [
 [package.extras]
 test = ["pytest (>=3.0.0)"]
 
+[[package]]
+name = "distlib"
+version = "0.3.8"
+description = "Distribution utilities"
+optional = false
+python-versions = "*"
+files = [
+    {file = "distlib-0.3.8-py2.py3-none-any.whl", hash = "sha256:034db59a0b96f8ca18035f36290806a9a6e6bd9d1ff91e45a7f172eb17e51784"},
+    {file = "distlib-0.3.8.tar.gz", hash = "sha256:1530ea13e350031b6312d8580ddb6b27a104275a31106523b8f123787f494f64"},
+]
+
+[[package]]
+name = "filelock"
+version = "3.15.4"
+description = "A platform independent file lock."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "filelock-3.15.4-py3-none-any.whl", hash = "sha256:6ca1fffae96225dab4c6eaf1c4f4f28cd2568d3ec2a44e15a08520504de468e7"},
+    {file = "filelock-3.15.4.tar.gz", hash = "sha256:2207938cbc1844345cb01a5a95524dae30f0ce089eba5b00378295a17e3e90cb"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+testing = ["covdefaults (>=2.3)", "coverage (>=7.3.2)", "diff-cover (>=8.0.1)", "pytest (>=7.4.3)", "pytest-asyncio (>=0.21)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)", "pytest-timeout (>=2.2)", "virtualenv (>=20.26.2)"]
+typing = ["typing-extensions (>=4.8)"]
+
+[[package]]
+name = "identify"
+version = "2.5.36"
+description = "File identification library for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "identify-2.5.36-py2.py3-none-any.whl", hash = "sha256:37d93f380f4de590500d9dba7db359d0d3da95ffe7f9de1753faa159e71e7dfa"},
+    {file = "identify-2.5.36.tar.gz", hash = "sha256:e5e00f54165f9047fbebeb4a560f9acfb8af4c88232be60a488e9b68d122745d"},
+]
+
+[package.extras]
+license = ["ukkonen"]
+
 [[package]]
 name = "levenshtein"
 version = "0.25.1"
@@ -141,6 +253,190 @@ files = [
 [package.dependencies]
 rapidfuzz = ">=3.8.0,<4.0.0"
 
+[[package]]
+name = "mypy"
+version = "1.10.1"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mypy-1.10.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e36f229acfe250dc660790840916eb49726c928e8ce10fbdf90715090fe4ae02"},
+    {file = "mypy-1.10.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:51a46974340baaa4145363b9e051812a2446cf583dfaeba124af966fa44593f7"},
+    {file = "mypy-1.10.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:901c89c2d67bba57aaaca91ccdb659aa3a312de67f23b9dfb059727cce2e2e0a"},
+    {file = "mypy-1.10.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0cd62192a4a32b77ceb31272d9e74d23cd88c8060c34d1d3622db3267679a5d9"},
+    {file = "mypy-1.10.1-cp310-cp310-win_amd64.whl", hash = "sha256:a2cbc68cb9e943ac0814c13e2452d2046c2f2b23ff0278e26599224cf164e78d"},
+    {file = "mypy-1.10.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bd6f629b67bb43dc0d9211ee98b96d8dabc97b1ad38b9b25f5e4c4d7569a0c6a"},
+    {file = "mypy-1.10.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a1bbb3a6f5ff319d2b9d40b4080d46cd639abe3516d5a62c070cf0114a457d84"},
+    {file = "mypy-1.10.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8edd4e9bbbc9d7b79502eb9592cab808585516ae1bcc1446eb9122656c6066f"},
+    {file = "mypy-1.10.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:6166a88b15f1759f94a46fa474c7b1b05d134b1b61fca627dd7335454cc9aa6b"},
+    {file = "mypy-1.10.1-cp311-cp311-win_amd64.whl", hash = "sha256:5bb9cd11c01c8606a9d0b83ffa91d0b236a0e91bc4126d9ba9ce62906ada868e"},
+    {file = "mypy-1.10.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:d8681909f7b44d0b7b86e653ca152d6dff0eb5eb41694e163c6092124f8246d7"},
+    {file = "mypy-1.10.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:378c03f53f10bbdd55ca94e46ec3ba255279706a6aacaecac52ad248f98205d3"},
+    {file = "mypy-1.10.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6bacf8f3a3d7d849f40ca6caea5c055122efe70e81480c8328ad29c55c69e93e"},
+    {file = "mypy-1.10.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:701b5f71413f1e9855566a34d6e9d12624e9e0a8818a5704d74d6b0402e66c04"},
+    {file = "mypy-1.10.1-cp312-cp312-win_amd64.whl", hash = "sha256:3c4c2992f6ea46ff7fce0072642cfb62af7a2484efe69017ed8b095f7b39ef31"},
+    {file = "mypy-1.10.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:604282c886497645ffb87b8f35a57ec773a4a2721161e709a4422c1636ddde5c"},
+    {file = "mypy-1.10.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37fd87cab83f09842653f08de066ee68f1182b9b5282e4634cdb4b407266bade"},
+    {file = "mypy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8addf6313777dbb92e9564c5d32ec122bf2c6c39d683ea64de6a1fd98b90fe37"},
+    {file = "mypy-1.10.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5cc3ca0a244eb9a5249c7c583ad9a7e881aa5d7b73c35652296ddcdb33b2b9c7"},
+    {file = "mypy-1.10.1-cp38-cp38-win_amd64.whl", hash = "sha256:1b3a2ffce52cc4dbaeee4df762f20a2905aa171ef157b82192f2e2f368eec05d"},
+    {file = "mypy-1.10.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:fe85ed6836165d52ae8b88f99527d3d1b2362e0cb90b005409b8bed90e9059b3"},
+    {file = "mypy-1.10.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c2ae450d60d7d020d67ab440c6e3fae375809988119817214440033f26ddf7bf"},
+    {file = "mypy-1.10.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6be84c06e6abd72f960ba9a71561c14137a583093ffcf9bbfaf5e613d63fa531"},
+    {file = "mypy-1.10.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:2189ff1e39db399f08205e22a797383613ce1cb0cb3b13d8bcf0170e45b96cc3"},
+    {file = "mypy-1.10.1-cp39-cp39-win_amd64.whl", hash = "sha256:97a131ee36ac37ce9581f4220311247ab6cba896b4395b9c87af0675a13a755f"},
+    {file = "mypy-1.10.1-py3-none-any.whl", hash = "sha256:71d8ac0b906354ebda8ef1673e5fde785936ac1f29ff6987c7483cfbd5a4235a"},
+    {file = "mypy-1.10.1.tar.gz", hash = "sha256:1f8f492d7db9e3593ef42d4f115f04e556130f2819ad33ab84551403e97dd4c0"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = ">=4.1.0"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+install-types = ["pip"]
+mypyc = ["setuptools (>=50)"]
+reports = ["lxml"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
+[[package]]
+name = "nodeenv"
+version = "1.9.1"
+description = "Node.js virtual environment builder"
+optional = false
+python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+files = [
+    {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
+    {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
+]
+
+[[package]]
+name = "packaging"
+version = "24.1"
+description = "Core utilities for Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
+    {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
+]
+
+[[package]]
+name = "pathspec"
+version = "0.12.1"
+description = "Utility library for gitignore style pattern matching of file paths."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
+    {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
+]
+
+[[package]]
+name = "platformdirs"
+version = "4.2.2"
+description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
+    {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
+]
+
+[package.extras]
+docs = ["furo (>=2023.9.10)", "proselint (>=0.13)", "sphinx (>=7.2.6)", "sphinx-autodoc-typehints (>=1.25.2)"]
+test = ["appdirs (==1.4.4)", "covdefaults (>=2.3)", "pytest (>=7.4.3)", "pytest-cov (>=4.1)", "pytest-mock (>=3.12)"]
+type = ["mypy (>=1.8)"]
+
+[[package]]
+name = "pre-commit"
+version = "3.5.0"
+description = "A framework for managing and maintaining multi-language pre-commit hooks."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "pre_commit-3.5.0-py2.py3-none-any.whl", hash = "sha256:841dc9aef25daba9a0238cd27984041fa0467b4199fc4852e27950664919f660"},
+    {file = "pre_commit-3.5.0.tar.gz", hash = "sha256:5804465c675b659b0862f07907f96295d490822a450c4c40e747d0b1c6ebcb32"},
+]
+
+[package.dependencies]
+cfgv = ">=2.0.0"
+identify = ">=1.0.0"
+nodeenv = ">=0.11.1"
+pyyaml = ">=5.1"
+virtualenv = ">=20.10.0"
+
+[[package]]
+name = "pyyaml"
+version = "6.0.1"
+description = "YAML parser and emitter for Python"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d858aa552c999bc8a8d57426ed01e40bef403cd8ccdd0fc5f6f04a00414cac2a"},
+    {file = "PyYAML-6.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd66fc5d0da6d9815ba2cebeb4205f95818ff4b79c3ebe268e75d961704af52f"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69b023b2b4daa7548bcfbd4aa3da05b3a74b772db9e23b982788168117739938"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:81e0b275a9ecc9c0c0c07b4b90ba548307583c125f54d5b6946cfee6360c733d"},
+    {file = "PyYAML-6.0.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba336e390cd8e4d1739f42dfe9bb83a3cc2e80f567d8805e11b46f4a943f5515"},
+    {file = "PyYAML-6.0.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:326c013efe8048858a6d312ddd31d56e468118ad4cdeda36c719bf5bb6192290"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win32.whl", hash = "sha256:bd4af7373a854424dabd882decdc5579653d7868b8fb26dc7d0e99f823aa5924"},
+    {file = "PyYAML-6.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:fd1592b3fdf65fff2ad0004b5e363300ef59ced41c2e6b3a99d4089fa8c5435d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:6965a7bc3cf88e5a1c3bd2e0b5c22f8d677dc88a455344035f03399034eb3007"},
+    {file = "PyYAML-6.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f003ed9ad21d6a4713f0a9b5a7a0a79e08dd0f221aff4525a2be4c346ee60aab"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:42f8152b8dbc4fe7d96729ec2b99c7097d656dc1213a3229ca5383f973a5ed6d"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc"},
+    {file = "PyYAML-6.0.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d2b04aac4d386b172d5b9692e2d2da8de7bfb6c387fa4f801fbf6fb2e6ba4673"},
+    {file = "PyYAML-6.0.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:e7d73685e87afe9f3b36c799222440d6cf362062f78be1013661b00c5c6f678b"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win32.whl", hash = "sha256:1635fd110e8d85d55237ab316b5b011de701ea0f29d07611174a1b42f1444741"},
+    {file = "PyYAML-6.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:bf07ee2fef7014951eeb99f56f39c9bb4af143d8aa3c21b1677805985307da34"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:855fb52b0dc35af121542a76b9a84f8d1cd886ea97c84703eaa6d88e37a2ad28"},
+    {file = "PyYAML-6.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:40df9b996c2b73138957fe23a16a4f0ba614f4c0efce1e9406a184b6d07fa3a9"},
+    {file = "PyYAML-6.0.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6c22bec3fbe2524cde73d7ada88f6566758a8f7227bfbf93a408a9d86bcc12a0"},
+    {file = "PyYAML-6.0.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8d4e9c88387b0f5c7d5f281e55304de64cf7f9c0021a3525bd3b1c542da3b0e4"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win32.whl", hash = "sha256:d483d2cdf104e7c9fa60c544d92981f12ad66a457afae824d146093b8c294c54"},
+    {file = "PyYAML-6.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0d3304d8c0adc42be59c5f8a4d9e3d7379e6955ad754aa9d6ab7a398b59dd1df"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:50550eb667afee136e9a77d6dc71ae76a44df8b3e51e41b77f6de2932bfe0f47"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1fe35611261b29bd1de0070f0b2f47cb6ff71fa6595c077e42bd0c419fa27b98"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:704219a11b772aea0d8ecd7058d0082713c3562b4e271b849ad7dc4a5c90c13c"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:afd7e57eddb1a54f0f1a974bc4391af8bcce0b444685d936840f125cf046d5bd"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win32.whl", hash = "sha256:fca0e3a251908a499833aa292323f32437106001d436eca0e6e7833256674585"},
+    {file = "PyYAML-6.0.1-cp36-cp36m-win_amd64.whl", hash = "sha256:f22ac1c3cac4dbc50079e965eba2c1058622631e526bd9afd45fedd49ba781fa"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b1275ad35a5d18c62a7220633c913e1b42d44b46ee12554e5fd39c70a243d6a3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:18aeb1bf9a78867dc38b259769503436b7c72f7a1f1f4c93ff9a17de54319b27"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:596106435fa6ad000c2991a98fa58eeb8656ef2325d7e158344fb33864ed87e3"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:baa90d3f661d43131ca170712d903e6295d1f7a0f595074f151c0aed377c9b9c"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win32.whl", hash = "sha256:9046c58c4395dff28dd494285c82ba00b546adfc7ef001486fbf0324bc174fba"},
+    {file = "PyYAML-6.0.1-cp37-cp37m-win_amd64.whl", hash = "sha256:4fb147e7a67ef577a588a0e2c17b6db51dda102c71de36f8549b6816a96e1867"},
+    {file = "PyYAML-6.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1d4c7e777c441b20e32f52bd377e0c409713e8bb1386e1099c2415f26e479595"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a0cd17c15d3bb3fa06978b4e8958dcdc6e0174ccea823003a106c7d4d7899ac5"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:28c119d996beec18c05208a8bd78cbe4007878c6dd15091efb73a30e90539696"},
+    {file = "PyYAML-6.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7e07cbde391ba96ab58e532ff4803f79c4129397514e1413a7dc761ccd755735"},
+    {file = "PyYAML-6.0.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:49a183be227561de579b4a36efbb21b3eab9651dd81b1858589f796549873dd6"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win32.whl", hash = "sha256:184c5108a2aca3c5b3d3bf9395d50893a7ab82a38004c8f61c258d4428e80206"},
+    {file = "PyYAML-6.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1e2722cc9fbb45d9b87631ac70924c11d3a401b2d7f410cc0e3bbf249f2dca62"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9eb6caa9a297fc2c2fb8862bc5370d0303ddba53ba97e71f08023b6cd73d16a8"},
+    {file = "PyYAML-6.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c8098ddcc2a85b61647b2590f825f3db38891662cfc2fc776415143f599bb859"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5773183b6446b2c99bb77e77595dd486303b4faab2b086e7b17bc6bef28865f6"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b786eecbdf8499b9ca1d697215862083bd6d2a99965554781d0d8d1ad31e13a0"},
+    {file = "PyYAML-6.0.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bc1bf2925a1ecd43da378f4db9e4f799775d6367bdb94671027b73b393a7c42c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win32.whl", hash = "sha256:faca3bdcf85b2fc05d06ff3fbc1f83e1391b3e724afa3feba7d13eeab355484c"},
+    {file = "PyYAML-6.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:510c9deebc5c0225e8c96813043e62b680ba2f9c50a08d3724c7f28a747d1486"},
+    {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"},
+]
+
 [[package]]
 name = "rapidfuzz"
 version = "3.9.2"
@@ -257,6 +553,48 @@ files = [
     {file = "textwrap3-0.9.2.zip", hash = "sha256:5008eeebdb236f6303dcd68f18b856d355f6197511d952ba74bc75e40e0c3414"},
 ]
 
+[[package]]
+name = "tomli"
+version = "2.0.1"
+description = "A lil' TOML parser"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tomli-2.0.1-py3-none-any.whl", hash = "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc"},
+    {file = "tomli-2.0.1.tar.gz", hash = "sha256:de526c12914f0c550d15924c62d72abc48d6fe7364aa87328337a31007fe8a4f"},
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
+[[package]]
+name = "virtualenv"
+version = "20.26.3"
+description = "Virtual Python Environment builder"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "virtualenv-20.26.3-py3-none-any.whl", hash = "sha256:8cc4a31139e796e9a7de2cd5cf2489de1217193116a8fd42328f1bd65f434589"},
+    {file = "virtualenv-20.26.3.tar.gz", hash = "sha256:4c43a2a236279d9ea36a0d76f98d84bd6ca94ac4e0f4a3b9d46d05e10fea542a"},
+]
+
+[package.dependencies]
+distlib = ">=0.3.7,<1"
+filelock = ">=3.12.2,<4"
+platformdirs = ">=3.9.1,<5"
+
+[package.extras]
+docs = ["furo (>=2023.7.26)", "proselint (>=0.13)", "sphinx (>=7.1.2,!=7.3)", "sphinx-argparse (>=0.4)", "sphinxcontrib-towncrier (>=0.2.1a0)", "towncrier (>=23.6)"]
+test = ["covdefaults (>=2.3)", "coverage (>=7.2.7)", "coverage-enable-subprocess (>=1)", "flaky (>=3.7)", "packaging (>=23.1)", "pytest (>=7.4)", "pytest-env (>=0.8.2)", "pytest-freezer (>=0.4.8)", "pytest-mock (>=3.11.1)", "pytest-randomly (>=3.12)", "pytest-timeout (>=2.1)", "setuptools (>=68)", "time-machine (>=2.10)"]
+
 [[package]]
 name = "watchdog"
 version = "4.0.1"
@@ -304,4 +642,4 @@ watchmedo = ["PyYAML (>=3.10)"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.8"
-content-hash = "3d86c1662a6ebc6bb6205acb6d9845669da9fd6cf0b517ab5f61a8d30109f268"
+content-hash = "1c4e79a0fab5d1de1927ae55f347df1182b28c1932afa46798a7a3eb380c26d5"
diff --git a/pyproject.toml b/pyproject.toml
index 60415cb..3b9e29a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,10 @@ description = ""
 authors = ["Simon Lindholm <simon.lindholm10@gmail.com>"]
 license = "UNLICENSE"
 readme = "README.md"
-packages = [{ include = "diff.py" }]
+packages = [{ include = "asm_differ" }]
+
+[tool.poetry.scripts]
+asm-differ = "asm_differ.diff:main"
 
 [tool.poetry.dependencies]
 python = "^3.8"
@@ -15,6 +18,10 @@ watchdog = "^4.0.1"
 levenshtein = "^0.25.0"
 cxxfilt = "^0.3.0"
 
+[tool.poetry.group.dev.dependencies]
+pre-commit = "*"
+black = "^24.4.2"
+mypy = "^1.10.0"
 
 [build-system]
 requires = ["poetry-core"]
diff --git a/test.py b/test.py
index 64eb806..5fa6592 100644
--- a/test.py
+++ b/test.py
@@ -1,5 +1,5 @@
 import unittest
-import diff
+import asm_differ.diff as diff
 import json