diff --git a/slothy/helper.py b/slothy/helper.py index 3d386d09..a481e2fc 100644 --- a/slothy/helper.py +++ b/slothy/helper.py @@ -1212,12 +1212,28 @@ def assemble(source, arch, attr, log, symbol=None, preprocessor=None, include_pa include=include_paths) except subprocess.CalledProcessError as exc: log.error("CPreprocessor failed on the following input") - log.error(SouceLine.write_multiline(source)) + log.error(SourceLine.write_multiline(source)) raise LLVM_Mc_Error from exc if platform.system() == "Darwin": source = list(filter(lambda s: s.text.strip().startswith(".type") is False, source)) + + # Remove all width information - LLVM cannot handle .w for + # some instructions that only have a 32-bit encoding, + # e.g., uadd16.w works in gcc, but not LLVM. + # Unfortunately, for some instructions this depends + # on the registers used and, hence, adjusting the input to + # SLOTHY is not sufficient. + # As currently, we don't have a model of the instruction encodings, + # there is no principled way to reason about it. + if thumb: + for line in source: + instruction = line.text + instruction = instruction.replace(".w ", " ") + instruction = instruction.replace(".n ", " ") + line.set_text(instruction) + code = SourceLine.write_multiline(source) log.debug(f"Calling LLVM MC assmelber on the following code") @@ -1585,7 +1601,7 @@ def extract(source, lbl, forced_loop_type=None): """ Find a loop with start label `lbl` in `source` and return it together with its type. - + Args: source: list of SourceLine objects lbl: label of the loop to extract diff --git a/slothy/targets/arm_v7m/arch_v7m.py b/slothy/targets/arm_v7m/arch_v7m.py index 1de96011..35eaceb2 100644 --- a/slothy/targets/arm_v7m/arch_v7m.py +++ b/slothy/targets/arm_v7m/arch_v7m.py @@ -277,7 +277,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, # if new_fixup != 0: # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" #if new_fixup != 0 or fixup != 0: if fixup != 0: yield f"{indent}vmov {self.additional_data['endf']}, {self.additional_data['end']}" @@ -383,7 +383,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, yield f"{indent}vmov {loop_end_reg}, {loop_end_reg_fpr}" if fixup != 0: - yield f"{indent}sub {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {loop_end_reg}, {loop_end_reg}, #{fixup*inc_per_iter}" if fixup != 0 and loop_end_reg_fpr is not None: yield f"{indent}vmov {loop_end_reg_fpr}, {loop_end_reg}" @@ -457,7 +457,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, # yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{new_fixup}" if fixup != 0: - yield f"{indent}sub {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" + yield f"{indent}sub.w {self.additional_data['end']}, {self.additional_data['end']}, #{fixup*inc_per_iter}" if jump_if_empty is not None: yield f"cbz {loop_cnt}, {jump_if_empty}" @@ -499,7 +499,7 @@ def start(self, loop_cnt, indentation=0, fixup=0, unroll=1, jump_if_empty=None, assert unroll in [1,2,4,8,16,32] yield f"{indent}lsr {loop_cnt}, {loop_cnt}, #{int(math.log2(unroll))}" if fixup != 0: - yield f"{indent}sub {loop_cnt}, {loop_cnt}, #{fixup}" + yield f"{indent}sub.w {loop_cnt}, {loop_cnt}, #{fixup}" if jump_if_empty is not None: yield f"cbz {loop_cnt}, {jump_if_empty}" yield f"{self.lbl_start}:" @@ -1079,6 +1079,10 @@ def make(cls, src): return Armv7mInstruction.build(cls, src) def write(self): + # Default to .w for all instructions for better performance + # TODO: find a more principled way to do this + self.width = ".w" + out = self.pattern l = list(zip(self.args_in, self.pattern_inputs)) + \ list(zip(self.args_out, self.pattern_outputs)) + \