From 3d58db8765c95bc7ee0d80f974515bf91f01c633 Mon Sep 17 00:00:00 2001 From: "Matthias J. Kannwischer" Date: Mon, 13 Jan 2025 11:32:20 +0800 Subject: [PATCH] Armv7-M: Default to .w for better alignment Currently, we maintain the instruction-width modifiers as they are in the input code (except for some exceptions). However, this can negatively impact performance as SLOTHY may break code-alignment. This commit changes the Armv7-M instruction writer, to output .w for all instructions resulting in the best performance (modulo the size of the instruction cache). Unfortunately, LLVM (in the selftest) stumbles over some of these .w modifiers in places where they do not have any effect. To work around that, we remove the modifiers for the selftest. --- slothy/helper.py | 20 ++++++++++++++++++-- slothy/targets/arm_v7m/arch_v7m.py | 12 ++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/slothy/helper.py b/slothy/helper.py index 3d386d09..a481e2fc 100644 --- a/slothy/helper.py +++ b/slothy/helper.py @@ -1212,12 +1212,28 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa include=include_paths) except subprocess.CalledProcessError as exc: log.error("CPreprocessor failed on the following input") - log.error(SouceLine.write_multiline(source)) + log.error(SourceLine.write_multiline(source)) raise LLVM_Mc_Error from exc if platform.system() == "Darwin": source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source)) + + # Remove all width information - LLVM cannot handle .w for + # some instructions that only have a 32-bit encoding, + # e.g., uadd16.w works in gcc, but not LLVM. + # Unfortunately, for some instructions this depends + # on the registers used and, hence, adjusting the input to + # SLOTHY is not sufficient. + # As currently, we don't have a model of the instruction encodings, + # there is no principled way to reason about it. + if thumb: + for line in source: + instruction = line.text + instruction = instruction.replace(".w ", " ") + instruction = instruction.replace(".n ", " ") + line.set_text(instruction) + code = SourceLine.write_multiline(source) log.debug(f"Calling LLVM MC assmelber on the following code") @@ -1585,7 +1601,7 @@ def extract(source, lbl, forced_loop_type=None): """ Find a loop with start label `lbl` in `source` and return it together with its type. - + Args: source: list of SourceLine objects lbl: label of the loop to extract diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 1de96011..35eaceb2 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -277,7 +277,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, # if new_fixup != 0: # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" #if new_fixup != 0 or fixup != 0: if fixup != 0: yield f"{indent}vmov {self.additional_data['endf']}, {self.additional_data['end']}" @@ -383,7 +383,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, yield f"{indent}vmov {loop_end_reg}, {loop_end_reg_fpr}" if fixup != 0: - yield f"{indent}sub {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}" if fixup != 0 and loop_end_reg_fpr is not None: yield f"{indent}vmov {loop_end_reg_fpr}, {loop_end_reg}" @@ -457,7 +457,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" if jump_if_empty is not None: yield f"cbz {loop_cnt}, {jump_if_empty}" @@ -499,7 +499,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, assert unroll in [1,2,4,8,16,32] yield f"{indent}lsr {loop_cnt}, {loop_cnt}, #{int(math.log2(unroll))}" if fixup != 0: - yield f"{indent}sub {loop_cnt}, {loop_cnt}, #{fixup}" + yield f"{indent}sub.w {loop_cnt}, {loop_cnt}, #{fixup}" if jump_if_empty is not None: yield f"cbz {loop_cnt}, {jump_if_empty}" yield f"{self.lbl_start}:" @@ -1079,6 +1079,10 @@ def make(cls, src): return Armv7mInstruction.build(cls, src) def write(self): + # Default to .w for all instructions for better performance + # TODO: find a more principled way to do this + self.width = ".w" + out = self.pattern l = list(zip(self.args_in, self.pattern_inputs)) + \ list(zip(self.args_out, self.pattern_outputs)) + \