From 0de7b128f64efaa1533ee49a728d83125673b116 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 25 Apr 2020 20:16:48 -0700 Subject: [PATCH 01/54] Add --no-andor for loop.c, and a similar looping test case --- .../end_to_end/loop/irix-o2-noandor-flags.txt | 1 + tests/end_to_end/loop/irix-o2-noandor-out.c | 61 +++++++++++++++++++ tests/end_to_end/loop/irix-o2-noandor.s | 34 +++++++++++ tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 1 + tests/end_to_end/mk64_unknown_1/irix-o2.s | 53 ++++++++++++++++ 5 files changed, 150 insertions(+) create mode 100644 tests/end_to_end/loop/irix-o2-noandor-flags.txt create mode 100644 tests/end_to_end/loop/irix-o2-noandor-out.c create mode 100644 tests/end_to_end/loop/irix-o2-noandor.s create mode 100644 tests/end_to_end/mk64_unknown_1/irix-o2-out.c create mode 100644 tests/end_to_end/mk64_unknown_1/irix-o2.s diff --git a/tests/end_to_end/loop/irix-o2-noandor-flags.txt b/tests/end_to_end/loop/irix-o2-noandor-flags.txt new file mode 100644 index 00000000..751e7042 --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-noandor-flags.txt @@ -0,0 +1 @@ +--no-andor diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c new file mode 100644 index 00000000..d1db09bb --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -0,0 +1,61 @@ +s32 test(void *arg0, s32 arg1) +{ + s32 temp_a3; + s32 temp_v0; + s32 temp_v0_2; + void *temp_v1; + void *phi_v1; + s32 phi_v0; + void *phi_v1_2; + s32 phi_v0_2; + s32 phi_return; + s32 phi_v0_3; + + phi_return = 0; + if (arg1 > 0) + { + temp_a3 = arg1 & 3; + phi_v0_3 = 0; + if (temp_a3 != 0) + { + phi_v1 = arg0; + phi_v0 = 0; +loop_3: + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + if (temp_a3 != temp_v0) + { + goto loop_3; + } + phi_return = temp_v0; + phi_v0_3 = temp_v0; + if (temp_v0 != arg1) + { +block_5: + phi_v1_2 = arg0 + phi_v0_3; + phi_v0_2 = phi_v0_3; +loop_6: + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + if (temp_v0_2 != arg1) + { + goto loop_6; + } + } + } + else + { + goto block_5; + } + } + return phi_return; +} diff --git a/tests/end_to_end/loop/irix-o2-noandor.s b/tests/end_to_end/loop/irix-o2-noandor.s new file mode 100644 index 00000000..4d72661c --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-noandor.s @@ -0,0 +1,34 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 000090 00400090 18A00012 */ blez $a1, .L004000DC +/* 000094 00400094 00001025 */ move $v0, $zero +/* 000098 00400098 30A70003 */ andi $a3, $a1, 3 +/* 00009C 0040009C 10E00007 */ beqz $a3, .L004000BC +/* 0000A0 004000A0 00E03025 */ move $a2, $a3 +/* 0000A4 004000A4 00801821 */ move $v1, $a0 +.L004000A8: +/* 0000A8 004000A8 24420001 */ addiu $v0, $v0, 1 +/* 0000AC 004000AC A0600000 */ sb $zero, ($v1) +/* 0000B0 004000B0 14C2FFFD */ bne $a2, $v0, .L004000A8 +/* 0000B4 004000B4 24630001 */ addiu $v1, $v1, 1 +/* 0000B8 004000B8 10450008 */ beq $v0, $a1, .L004000DC +.L004000BC: +/* 0000BC 004000BC 00821821 */ addu $v1, $a0, $v0 +.L004000C0: +/* 0000C0 004000C0 24420004 */ addiu $v0, $v0, 4 +/* 0000C4 004000C4 A0600001 */ sb $zero, 1($v1) +/* 0000C8 004000C8 A0600002 */ sb $zero, 2($v1) +/* 0000CC 004000CC A0600003 */ sb $zero, 3($v1) +/* 0000D0 004000D0 24630004 */ addiu $v1, $v1, 4 +/* 0000D4 004000D4 1445FFFA */ bne $v0, $a1, .L004000C0 +/* 0000D8 004000D8 A060FFFC */ sb $zero, -4($v1) +.L004000DC: +/* 0000DC 004000DC 03E00008 */ jr $ra +/* 0000E0 004000E0 00000000 */ nop + +/* 0000E4 004000E4 00000000 */ nop +/* 0000E8 004000E8 00000000 */ nop +/* 0000EC 004000EC 00000000 */ nop diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c new file mode 100644 index 00000000..1cb4f301 --- /dev/null +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -0,0 +1 @@ +CRASHED diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2.s b/tests/end_to_end/mk64_unknown_1/irix-o2.s new file mode 100644 index 00000000..ffe00539 --- /dev/null +++ b/tests/end_to_end/mk64_unknown_1/irix-o2.s @@ -0,0 +1,53 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 11837C 802AED6C 00047602 */ srl $t6, $a0, 0x18 +/* 118380 802AED70 000E7880 */ sll $t7, $t6, 2 +/* 118384 802AED74 3C188015 */ lui $t8, 0x8015 +/* 118388 802AED78 030FC021 */ addu $t8, $t8, $t7 +/* 11838C 802AED7C 3C0100FF */ lui $at, (0x00FFFFFF >> 16) # lui $at, 0xff +/* 118390 802AED80 8F180258 */ lw $t8, 0x258($t8) +/* 118394 802AED84 3421FFFF */ ori $at, (0x00FFFFFF & 0xFFFF) # ori $at, $at, 0xffff +/* 118398 802AED88 0081C824 */ and $t9, $a0, $at +/* 11839C 802AED8C 3C018000 */ lui $at, 0x8000 +/* 1183A0 802AED90 03191021 */ addu $v0, $t8, $t9 +/* 1183A4 802AED94 00411021 */ addu $v0, $v0, $at +/* 1183A8 802AED98 10A0001E */ beqz $a1, .L802AEE14 +/* 1183AC 802AED9C 00001825 */ move $v1, $zero +/* 1183B0 802AEDA0 30A80003 */ andi $t0, $a1, 3 +/* 1183B4 802AEDA4 1100000C */ beqz $t0, .L802AEDD8 +/* 1183B8 802AEDA8 01003825 */ move $a3, $t0 +/* 1183BC 802AEDAC 3C0A8016 */ lui $t2, %hi(D_8015F668) # $t2, 0x8016 +/* 1183C0 802AEDB0 254AF668 */ addiu $t2, %lo(D_8015F668) # addiu $t2, $t2, -0x998 +/* 1183C4 802AEDB4 00064880 */ sll $t1, $a2, 2 +/* 1183C8 802AEDB8 012A2021 */ addu $a0, $t1, $t2 +.L802AEDBC: +/* 1183CC 802AEDBC AC820000 */ sw $v0, ($a0) +/* 1183D0 802AEDC0 24630001 */ addiu $v1, $v1, 1 +/* 1183D4 802AEDC4 24420010 */ addiu $v0, $v0, 0x10 +/* 1183D8 802AEDC8 24C60001 */ addiu $a2, $a2, 1 +/* 1183DC 802AEDCC 14E3FFFB */ bne $a3, $v1, .L802AEDBC +/* 1183E0 802AEDD0 24840004 */ addiu $a0, $a0, 4 +/* 1183E4 802AEDD4 1065000F */ beq $v1, $a1, .L802AEE14 +.L802AEDD8: +/* 1183E8 802AEDD8 3C0C8016 */ lui $t4, %hi(D_8015F668) # $t4, 0x8016 +/* 1183EC 802AEDDC 258CF668 */ addiu $t4, %lo(D_8015F668) # addiu $t4, $t4, -0x998 +/* 1183F0 802AEDE0 00065880 */ sll $t3, $a2, 2 +/* 1183F4 802AEDE4 016C2021 */ addu $a0, $t3, $t4 +.L802AEDE8: +/* 1183F8 802AEDE8 AC820000 */ sw $v0, ($a0) +/* 1183FC 802AEDEC 24420010 */ addiu $v0, $v0, 0x10 +/* 118400 802AEDF0 AC820004 */ sw $v0, 4($a0) +/* 118404 802AEDF4 24420010 */ addiu $v0, $v0, 0x10 +/* 118408 802AEDF8 AC820008 */ sw $v0, 8($a0) +/* 11840C 802AEDFC 24420010 */ addiu $v0, $v0, 0x10 +/* 118410 802AEE00 AC82000C */ sw $v0, 0xc($a0) +/* 118414 802AEE04 24630004 */ addiu $v1, $v1, 4 +/* 118418 802AEE08 24420010 */ addiu $v0, $v0, 0x10 +/* 11841C 802AEE0C 1465FFF6 */ bne $v1, $a1, .L802AEDE8 +/* 118420 802AEE10 24840010 */ addiu $a0, $a0, 0x10 +.L802AEE14: +/* 118424 802AEE14 03E00008 */ jr $ra +/* 118428 802AEE18 00000000 */ nop \ No newline at end of file From 9d10258d26ecf347f29142d3e248b10dcb5d5e32 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 25 Apr 2020 21:52:16 -0700 Subject: [PATCH 02/54] Initial infrastructure for detecting for-loops --- src/if_statements.py | 107 +++++++++++++++++- tests/end_to_end/loop/irix-g-out.c | 11 +- tests/end_to_end/loop_nested/irix-g-out.c | 10 +- .../end_to_end/multiple-assigns/irix-g-out.c | 24 +--- 4 files changed, 111 insertions(+), 41 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 8ccb78bc..9a73930d 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -1,4 +1,4 @@ -from typing import Dict, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union import attr @@ -24,6 +24,7 @@ simplify_condition, stringify_expr, ) +from .translate import Statement as TranslateStatement @attr.s @@ -106,7 +107,46 @@ def __str__(self) -> str: return "\n".join(lines) -Statement = Union[SimpleStatement, IfElseStatement, LabelStatement] +@attr.s +class ForLoop: + indent: int = attr.ib() + coding_style: CodingStyle = attr.ib() + + end_node: Node = attr.ib() + body: "Body" = attr.ib() + + initialization: Optional[List[TranslateStatement]] = attr.ib(default=None) + condition: Optional[Condition] = attr.ib(default=None) + afterthought: Optional[List[TranslateStatement]] = attr.ib(default=None) + + def should_write(self) -> bool: + return True + + def __str__(self) -> str: + space = " " * self.indent + + brace_after_if = f"\n{space}{{" if self.coding_style.newline_after_if else " {" + + init = ( + ", ".join(str(stmt).rstrip(";") for stmt in self.initialization) + if self.initialization + else "" + ) + cond = str(self.condition).rstrip(";") if self.condition else "" + after = ( + ", ".join(str(stmt).rstrip(";") for stmt in self.afterthought) + if self.afterthought + else "" + ) + string_components = [ + f"{space}for ({init}; {cond}; {after}){brace_after_if}", + str(self.body), # has its own indentation + f"{space}}}", + ] + return "\n".join(string_components) + + +Statement = Union[SimpleStatement, IfElseStatement, LabelStatement, ForLoop] @attr.s @@ -136,6 +176,9 @@ def add_comment(self, indent: int, contents: str) -> None: def add_if_else(self, if_else: IfElseStatement) -> None: self.statements.append(if_else) + def add_for_loop(self, for_loop: ForLoop) -> None: + self.statements.append(for_loop) + def __str__(self) -> str: return "\n".join( str(statement) for statement in self.statements if statement.should_write() @@ -514,6 +557,57 @@ def add_return_statement( body.add_statement(SimpleStatement(indent, "return;")) +def pattern_match_against_for_loop( + context: Context, start: ConditionalNode, indent: int +) -> Optional[ForLoop]: + node_1 = start.fallthrough_edge + node_2 = start.conditional_edge + + if not ( + isinstance(node_1, ConditionalNode) + and node_1.is_loop() + # TODO: could also use id() here?: + and node_1.conditional_edge.block.index == node_1.block.index + and node_1.fallthrough_edge.block.index == node_2.block.index + ): + return None + + # indent: int = attr.ib() + # coding_style: CodingStyle = attr.ib() + + # initialization: Expression = attr.ib() + # condition: Condition = attr.ib() # TODO: can this be an expression too? + # afterthought: Expression = attr.ib() + # body: "Body" = attr.ib() + + # end_node: Node = attr.ib() + + initialization_statements = [ + statement + for statement in start.block.block_info.to_write + if statement.should_write() + ] + + condition_statements = [ + statement + for statement in node_1.block.block_info.to_write + if statement.should_write() + ] + condition = CommaConditionExpr( + condition_statements, node_1.block.block_info.branch_condition + ) + + return ForLoop( + indent, + context.options.coding_style, + node_2, + Body(False, []), + initialization_statements, + condition, + None, + ) + + def build_flowgraph_between( context: Context, start: Node, end: Node, indent: int ) -> Body: @@ -592,6 +686,15 @@ def build_flowgraph_between( # In a BasicNode, the successor is the next articulation node. curr_start = curr_start.successor elif isinstance(curr_start, ConditionalNode): + # Before we do anything else, we pattern-match the subgraph + # rooted at curr_start against certain predefined subgraphs + # that emit for-loops: + for_loop = pattern_match_against_for_loop(context, curr_start, indent) + if for_loop: + body.add_for_loop(for_loop) + curr_start = for_loop.end_node + continue + # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 3402bcfd..188395ba 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -4,15 +4,8 @@ void test(s32 arg0, s32 arg1) s32 temp_t9; sp4 = 0; - if (arg1 > 0) + for (sp4 = 0; (*(arg0 + sp4) = (u8)0, temp_t9 = sp4 + 1, sp4 = temp_t9, ((temp_t9 < arg1) != 0)); ) { -loop_1: - *(arg0 + sp4) = (u8)0; - temp_t9 = sp4 + 1; - sp4 = temp_t9; - if (temp_t9 < arg1) - { - goto loop_1; - } + } } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 4fd02b8b..cc6cfc55 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -10,15 +10,9 @@ s32 test(s32 arg0) { loop_1: sp4 = 0; - if (sp4 < arg0) + for (sp4 = 0; (sp8 = sp8 + (spC * sp4), sp4 = sp4 + 1, ((sp4 < arg0) != 0)); ) { -loop_2: - sp8 = sp8 + (spC * sp4); - sp4 = sp4 + 1; - if (sp4 < arg0) - { - goto loop_2; - } + } spC = spC + 1; if (spC < arg0) diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 2908f3c6..edd9bb83 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -5,29 +5,9 @@ s32 test(s32 arg0) s32 phi_a0; phi_a0 = arg0; - if (arg0 == 5) + for (phi_a0 = arg0; (D_410150 = (s32) phi_a0, temp_a0 = phi_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, sp4 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = sp4, phi_a0 = temp_a0, (temp_a0 == 5)); ) { -loop_1: - D_410150 = (s32) phi_a0; - temp_a0 = phi_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - sp4 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = sp4; - phi_a0 = temp_a0; - if (temp_a0 == 5) - { - goto loop_1; - } + } return sp4; } From d394e9b82e8fbcb7ee94f6c52006f3accb31ee7a Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 10:48:56 -0700 Subject: [PATCH 03/54] Use do-while, not for; screwed up indentation --- src/if_statements.py | 77 ++++++++----------- tests/end_to_end/loop/irix-g-out.c | 10 ++- tests/end_to_end/loop_nested/irix-g-out.c | 9 ++- .../end_to_end/multiple-assigns/irix-g-out.c | 23 +++++- 4 files changed, 69 insertions(+), 50 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 9a73930d..5449d4a9 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -108,45 +108,46 @@ def __str__(self) -> str: @attr.s -class ForLoop: +class DoWhileLoop: indent: int = attr.ib() coding_style: CodingStyle = attr.ib() end_node: Node = attr.ib() body: "Body" = attr.ib() - initialization: Optional[List[TranslateStatement]] = attr.ib(default=None) + initialization: List[TranslateStatement] = attr.ib(factory=list) condition: Optional[Condition] = attr.ib(default=None) - afterthought: Optional[List[TranslateStatement]] = attr.ib(default=None) def should_write(self) -> bool: return True def __str__(self) -> str: space = " " * self.indent - brace_after_if = f"\n{space}{{" if self.coding_style.newline_after_if else " {" - - init = ( - ", ".join(str(stmt).rstrip(";") for stmt in self.initialization) - if self.initialization - else "" + brace_after_do = ( + f"\n{space * 2}{{" if self.coding_style.newline_after_if else " {" ) + + init = "\n".join(str(stmt) for stmt in self.initialization) cond = str(self.condition).rstrip(";") if self.condition else "" - after = ( - ", ".join(str(stmt).rstrip(";") for stmt in self.afterthought) - if self.afterthought - else "" - ) string_components = [ - f"{space}for ({init}; {cond}; {after}){brace_after_if}", - str(self.body), # has its own indentation + f"{space}{init}", + f"{space}if ({cond}){brace_after_if}", + f"{space * 2}do{brace_after_do}", + "\n".join(f"{space * 3}{stmt}" for stmt in self.body.statements), + f"{space * 2}}} while ({cond})", f"{space}}}", ] + # Remnant of for-loops, will eventually resurrect + # string_components = [ + # f"{space}for ({init}; {cond}; {after}){brace_after_if}", + # str(self.body), # has its own indentation + # f"{space}}}", + # ] return "\n".join(string_components) -Statement = Union[SimpleStatement, IfElseStatement, LabelStatement, ForLoop] +Statement = Union[SimpleStatement, IfElseStatement, LabelStatement, DoWhileLoop] @attr.s @@ -176,8 +177,8 @@ def add_comment(self, indent: int, contents: str) -> None: def add_if_else(self, if_else: IfElseStatement) -> None: self.statements.append(if_else) - def add_for_loop(self, for_loop: ForLoop) -> None: - self.statements.append(for_loop) + def add_do_while_loop(self, do_while_loop: DoWhileLoop) -> None: + self.statements.append(do_while_loop) def __str__(self) -> str: return "\n".join( @@ -557,9 +558,9 @@ def add_return_statement( body.add_statement(SimpleStatement(indent, "return;")) -def pattern_match_against_for_loop( +def pattern_match_against_do_while_loop( context: Context, start: ConditionalNode, indent: int -) -> Optional[ForLoop]: +) -> Optional[DoWhileLoop]: node_1 = start.fallthrough_edge node_2 = start.conditional_edge @@ -572,39 +573,25 @@ def pattern_match_against_for_loop( ): return None - # indent: int = attr.ib() - # coding_style: CodingStyle = attr.ib() - - # initialization: Expression = attr.ib() - # condition: Condition = attr.ib() # TODO: can this be an expression too? - # afterthought: Expression = attr.ib() - # body: "Body" = attr.ib() - - # end_node: Node = attr.ib() - initialization_statements = [ statement for statement in start.block.block_info.to_write if statement.should_write() ] - condition_statements = [ + body_statements = [ statement for statement in node_1.block.block_info.to_write if statement.should_write() ] - condition = CommaConditionExpr( - condition_statements, node_1.block.block_info.branch_condition - ) - return ForLoop( + return DoWhileLoop( indent, context.options.coding_style, node_2, - Body(False, []), + Body(False, body_statements), initialization_statements, - condition, - None, + node_1.block.block_info.branch_condition, ) @@ -688,11 +675,13 @@ def build_flowgraph_between( elif isinstance(curr_start, ConditionalNode): # Before we do anything else, we pattern-match the subgraph # rooted at curr_start against certain predefined subgraphs - # that emit for-loops: - for_loop = pattern_match_against_for_loop(context, curr_start, indent) - if for_loop: - body.add_for_loop(for_loop) - curr_start = for_loop.end_node + # that emit do-while-loops: + do_while_loop = pattern_match_against_do_while_loop( + context, curr_start, indent + ) + if do_while_loop: + body.add_do_while_loop(do_while_loop) + curr_start = do_while_loop.end_node continue # A ConditionalNode means we need to find the next articulation diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 188395ba..6390f2a8 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -4,8 +4,14 @@ void test(s32 arg0, s32 arg1) s32 temp_t9; sp4 = 0; - for (sp4 = 0; (*(arg0 + sp4) = (u8)0, temp_t9 = sp4 + 1, sp4 = temp_t9, ((temp_t9 < arg1) != 0)); ) + sp4 = 0; + if (((temp_t9 < arg1) != 0)) { - + do + { + *(arg0 + sp4) = (u8)0; + temp_t9 = sp4 + 1; + sp4 = temp_t9; + } while (((temp_t9 < arg1) != 0)) } } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index cc6cfc55..7fd80617 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -10,9 +10,14 @@ s32 test(s32 arg0) { loop_1: sp4 = 0; - for (sp4 = 0; (sp8 = sp8 + (spC * sp4), sp4 = sp4 + 1, ((sp4 < arg0) != 0)); ) + sp4 = 0; + if (((sp4 < arg0) != 0)) { - + do + { + sp8 = sp8 + (spC * sp4); + sp4 = sp4 + 1; + } while (((sp4 < arg0) != 0)) } spC = spC + 1; if (spC < arg0) diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index edd9bb83..6bff032b 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -5,9 +5,28 @@ s32 test(s32 arg0) s32 phi_a0; phi_a0 = arg0; - for (phi_a0 = arg0; (D_410150 = (s32) phi_a0, temp_a0 = phi_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, sp4 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = temp_a0, temp_a0 = temp_a0 + 1, D_410150 = sp4, phi_a0 = temp_a0, (temp_a0 == 5)); ) + phi_a0 = arg0; + if ((temp_a0 == 5)) { - + do + { + D_410150 = (s32) phi_a0; + temp_a0 = phi_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + sp4 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = sp4; + phi_a0 = temp_a0; + } while ((temp_a0 == 5)) } return sp4; } From ad167f70a8924ecba6d410e2e21f0742f4c5a50e Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 11:01:41 -0700 Subject: [PATCH 04/54] Fix screwed up indentation --- src/if_statements.py | 10 ++++++---- tests/end_to_end/loop_nested/irix-g-out.c | 10 +++++----- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 5449d4a9..f0c028ab 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -123,9 +123,11 @@ def should_write(self) -> bool: def __str__(self) -> str: space = " " * self.indent + space_2 = " " * (self.indent + 4) + space_3 = " " * (self.indent + 8) brace_after_if = f"\n{space}{{" if self.coding_style.newline_after_if else " {" brace_after_do = ( - f"\n{space * 2}{{" if self.coding_style.newline_after_if else " {" + f"\n{space_2}{{" if self.coding_style.newline_after_if else " {" ) init = "\n".join(str(stmt) for stmt in self.initialization) @@ -133,9 +135,9 @@ def __str__(self) -> str: string_components = [ f"{space}{init}", f"{space}if ({cond}){brace_after_if}", - f"{space * 2}do{brace_after_do}", - "\n".join(f"{space * 3}{stmt}" for stmt in self.body.statements), - f"{space * 2}}} while ({cond})", + f"{space_2}do{brace_after_do}", + "\n".join(f"{space_3}{stmt}" for stmt in self.body.statements), + f"{space_2}}} while ({cond})", f"{space}}}", ] # Remnant of for-loops, will eventually resurrect diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 7fd80617..7e5386ee 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -13,11 +13,11 @@ s32 test(s32 arg0) sp4 = 0; if (((sp4 < arg0) != 0)) { - do - { - sp8 = sp8 + (spC * sp4); - sp4 = sp4 + 1; - } while (((sp4 < arg0) != 0)) + do + { + sp8 = sp8 + (spC * sp4); + sp4 = sp4 + 1; + } while (((sp4 < arg0) != 0)) } spC = spC + 1; if (spC < arg0) From 8d99b31dbd4d5bcecdbc3f31e2ce595549cff837 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 14:18:34 -0700 Subject: [PATCH 05/54] Add complicated loop unrolling; break -g indentation --- src/if_statements.py | 189 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 182 insertions(+), 7 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index f0c028ab..0e16ec4f 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -20,11 +20,9 @@ Condition, Expression, FunctionInfo, - Type, - simplify_condition, - stringify_expr, ) from .translate import Statement as TranslateStatement +from .translate import Type, simplify_condition, stringify_expr @attr.s @@ -132,11 +130,11 @@ def __str__(self) -> str: init = "\n".join(str(stmt) for stmt in self.initialization) cond = str(self.condition).rstrip(";") if self.condition else "" + body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ f"{space}{init}", f"{space}if ({cond}){brace_after_if}", - f"{space_2}do{brace_after_do}", - "\n".join(f"{space_3}{stmt}" for stmt in self.body.statements), + f"{space_2}do{brace_after_do}{body}", f"{space_2}}} while ({cond})", f"{space}}}", ] @@ -149,6 +147,18 @@ def __str__(self) -> str: return "\n".join(string_components) +@attr.s +class UnrolledLoop: + indent: int = attr.ib() + coding_style: CodingStyle = attr.ib() + + end_node: Node = attr.ib() + body: "Body" = attr.ib() + + initialization: List[TranslateStatement] = attr.ib(factory=list) + condition: Optional[Condition] = attr.ib(default=None) + + Statement = Union[SimpleStatement, IfElseStatement, LabelStatement, DoWhileLoop] @@ -211,6 +221,12 @@ def emit_goto(context: Context, target: Node, body: Body, indent: int) -> None: body.add_statement(SimpleStatement(indent, f"goto {label};")) +def create_goto(context: Context, target: Node, indent: int) -> SimpleStatement: + label = label_for_node(context, target) + context.goto_nodes.add(target) + return SimpleStatement(indent, f"goto {label};") + + def emit_switch_jump( context: Context, expr: Expression, body: Body, indent: int ) -> None: @@ -560,7 +576,7 @@ def add_return_statement( body.add_statement(SimpleStatement(indent, "return;")) -def pattern_match_against_do_while_loop( +def pattern_match_against_simple_do_while_loop( context: Context, start: ConditionalNode, indent: int ) -> Optional[DoWhileLoop]: node_1 = start.fallthrough_edge @@ -597,6 +613,155 @@ def pattern_match_against_do_while_loop( ) +def pattern_match_against_unrolled_while_loop( + context: Context, start: ConditionalNode, indent: int +) -> Optional[Tuple[Node, IfElseStatement, Node]]: + node_1 = start.fallthrough_edge + node_7 = start.conditional_edge + + if not isinstance(node_1, ConditionalNode): + return None + node_2 = node_1.fallthrough_edge + node_5 = node_1.conditional_edge + + if not isinstance(node_2, BasicNode): + return None + node_3 = node_2.successor + + if not ( + isinstance(node_3, ConditionalNode) + and node_3.is_loop() + and node_3.conditional_edge.block.index == node_3.block.index + ): + return None + node_4 = node_3.fallthrough_edge + + if not ( + isinstance(node_4, ConditionalNode) + and node_4.fallthrough_edge.block.index == node_5.block.index + and node_4.conditional_edge.block.index == node_7.block.index + ): + return None + + if not isinstance(node_5, BasicNode): + return None + node_6 = node_5.successor + + if not ( + isinstance(node_6, ConditionalNode) + and node_6.is_loop() + and node_6.conditional_edge.block.index == node_6.block.index + and node_6.fallthrough_edge.block.index == node_7.block.index + ): + return None + + # for (i = 0; i < length; i++) { + # // code + # } + # + # becomes + # + # for (i = 0; i < (length % 4); i++) { + # // code in node 3 + # } + # for (i = (length % 4); i < length; i += 4) { + # // code, repeated for i through i + 3 + # // aka code in node 6 + # } + # + # which, much futzing later, becomes + # + # [node_0] + # if ([node_0.condition]) + # { + # [node_1] + # if (![node_1.condition]) + # { + # [node_2] + # while ([node_3.condition]) { + # [node_3] + # } + # [node_4] + # if ([node_4.condition]) { + # goto label_7 + # } + # } + # [node_5] + # while ([node_6.condition]) { + # [node_6] + # } + # } + # label_7: + # [node_7] + + assert isinstance(start.block.block_info, BlockInfo) + assert isinstance(node_1.block.block_info, BlockInfo) + assert isinstance(node_3.block.block_info, BlockInfo) + assert isinstance(node_4.block.block_info, BlockInfo) + assert isinstance(node_6.block.block_info, BlockInfo) + assert start.block.block_info.branch_condition + assert node_1.block.block_info.branch_condition + assert node_3.block.block_info.branch_condition + assert node_4.block.block_info.branch_condition + assert node_6.block.block_info.branch_condition + + main_body = Body(False, []) + emit_node(context, node_1, main_body, indent + 4) + + first_loop_metabody = Body(False, []) + emit_node(context, node_2, first_loop_metabody, indent + 8) + first_loop_body = Body(False, []) + emit_node(context, node_3, first_loop_body, indent + 16) + first_loop_metabody.add_statement( + DoWhileLoop( + indent + 8, + context.options.coding_style, + node_4, + first_loop_body, + [], + node_3.block.block_info.branch_condition, + ) + ) + emit_node(context, node_4, first_loop_metabody, indent + 8) + first_loop_metabody.add_statement( + IfElseStatement( + node_4.block.block_info.branch_condition, + indent + 8, + context.options.coding_style, + Body(False, [create_goto(context, node_7, indent + 12)]), + ) + ) + + first_loop_if = IfElseStatement( + node_1.block.block_info.branch_condition.negated(), + indent + 4, + context.options.coding_style, + first_loop_metabody, + ) + main_body.add_statement(first_loop_if) + emit_node(context, node_5, main_body, indent + 4) + second_loop_body = Body(False, []) + emit_node(context, node_6, second_loop_body, indent + 12) + main_body.add_statement( + DoWhileLoop( + indent + 4, + context.options.coding_style, + node_7, + second_loop_body, + [], + node_6.block.block_info.branch_condition, + ), + ) + + should_loop = IfElseStatement( + start.block.block_info.branch_condition, + indent, + context.options.coding_style, + main_body, + ) + return (start, should_loop, node_7) + + def build_flowgraph_between( context: Context, start: Node, end: Node, indent: int ) -> Body: @@ -678,7 +843,7 @@ def build_flowgraph_between( # Before we do anything else, we pattern-match the subgraph # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: - do_while_loop = pattern_match_against_do_while_loop( + do_while_loop = pattern_match_against_simple_do_while_loop( context, curr_start, indent ) if do_while_loop: @@ -686,6 +851,16 @@ def build_flowgraph_between( curr_start = do_while_loop.end_node continue + # Same thing for giant unrolled loops: + unrolled_loop = pattern_match_against_unrolled_while_loop( + context, curr_start, indent + ) + if unrolled_loop: + (_, loop_if_statement, curr_end) = unrolled_loop + body.add_if_else(loop_if_statement) + curr_start = curr_end + continue + # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go From 24f5e2f95eb7342cbbd39a9c9811ce836f4212ea Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 15:43:49 -0700 Subject: [PATCH 06/54] Actually run tests --- tests/end_to_end/loop/irix-g-out.c | 7 +- tests/end_to_end/loop/irix-o2-noandor-out.c | 45 ++++----- tests/end_to_end/loop/irix-o2-out.c | 63 +++++++++++- tests/end_to_end/loop_nested/irix-g-out.c | 5 +- tests/end_to_end/loop_nested/irix-o2-out.c | 95 ++++++++++++++++++- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 83 +++++++++++++++- .../end_to_end/multiple-assigns/irix-g-out.c | 33 ++++--- 7 files changed, 282 insertions(+), 49 deletions(-) diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 6390f2a8..4ac30748 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -8,10 +8,9 @@ void test(s32 arg0, s32 arg1) if (((temp_t9 < arg1) != 0)) { do - { - *(arg0 + sp4) = (u8)0; - temp_t9 = sp4 + 1; - sp4 = temp_t9; + {*(arg0 + sp4) = (u8)0; +temp_t9 = sp4 + 1; +sp4 = temp_t9; } while (((temp_t9 < arg1) != 0)) } } diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c index d1db09bb..8133a260 100644 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -12,7 +12,7 @@ s32 test(void *arg0, s32 arg1) s32 phi_v0_3; phi_return = 0; - if (arg1 > 0) + if (arg1 <= 0) { temp_a3 = arg1 & 3; phi_v0_3 = 0; @@ -20,23 +20,31 @@ s32 test(void *arg0, s32 arg1) { phi_v1 = arg0; phi_v0 = 0; -loop_3: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - if (temp_a3 != temp_v0) + + if ((temp_a3 != temp_v0)) { - goto loop_3; + do + { + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)) } phi_return = temp_v0; phi_v0_3 = temp_v0; - if (temp_v0 != arg1) + if (temp_v0 == arg1) + { + goto block_7; + } + } + phi_v1_2 = arg0 + phi_v0_3; + phi_v0_2 = phi_v0_3; + + if ((temp_v0_2 != arg1)) + { + do { -block_5: - phi_v1_2 = arg0 + phi_v0_3; - phi_v0_2 = phi_v0_3; -loop_6: temp_v0_2 = phi_v0_2 + 4; phi_v1_2->unk1 = (u8)0; phi_v1_2->unk2 = (u8)0; @@ -46,16 +54,9 @@ s32 test(void *arg0, s32 arg1) phi_v1_2 = temp_v1; phi_v0_2 = temp_v0_2; phi_return = temp_v0_2; - if (temp_v0_2 != arg1) - { - goto loop_6; - } - } - } - else - { - goto block_5; + } while ((temp_v0_2 != arg1)) } } +block_7: return phi_return; } diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 1cb4f301..8133a260 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -1 +1,62 @@ -CRASHED +s32 test(void *arg0, s32 arg1) +{ + s32 temp_a3; + s32 temp_v0; + s32 temp_v0_2; + void *temp_v1; + void *phi_v1; + s32 phi_v0; + void *phi_v1_2; + s32 phi_v0_2; + s32 phi_return; + s32 phi_v0_3; + + phi_return = 0; + if (arg1 <= 0) + { + temp_a3 = arg1 & 3; + phi_v0_3 = 0; + if (temp_a3 != 0) + { + phi_v1 = arg0; + phi_v0 = 0; + + if ((temp_a3 != temp_v0)) + { + do + { + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)) + } + phi_return = temp_v0; + phi_v0_3 = temp_v0; + if (temp_v0 == arg1) + { + goto block_7; + } + } + phi_v1_2 = arg0 + phi_v0_3; + phi_v0_2 = phi_v0_3; + + if ((temp_v0_2 != arg1)) + { + do + { + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + } while ((temp_v0_2 != arg1)) + } + } +block_7: + return phi_return; +} diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 7e5386ee..5a299d9c 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -14,9 +14,8 @@ s32 test(s32 arg0) if (((sp4 < arg0) != 0)) { do - { - sp8 = sp8 + (spC * sp4); - sp4 = sp4 + 1; + {sp8 = sp8 + (spC * sp4); +sp4 = sp4 + 1; } while (((sp4 < arg0) != 0)) } spC = spC + 1; diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 1cb4f301..e90b538a 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -1 +1,94 @@ -CRASHED +s32 test(s32 arg0) +{ + s32 temp_a1; + s32 temp_a1_2; + s32 temp_t1; + s32 temp_v0; + s32 temp_v1; + s32 temp_v1_2; + s32 phi_a3; + s32 phi_a1; + s32 phi_v0; + s32 phi_v1; + s32 phi_a1_2; + s32 phi_v1_2; + s32 phi_v1_3; + s32 phi_a2; + s32 phi_a3_2; + s32 phi_t0; + s32 phi_t1; + s32 phi_v1_4; + s32 phi_a2_2; + s32 phi_v1_5; + + phi_v0 = 0; + phi_v1 = 0; + phi_v1_5 = 0; + if (arg0 > 0) + { +loop_1: + phi_v1_2 = phi_v1_5; + if (arg0 <= 0) + { + temp_t1 = arg0 & 3; + phi_a1_2 = 0; + phi_v1_3 = phi_v1_5; + if (temp_t1 != 0) + { + phi_a3 = 1; + phi_v1_4 = phi_v1_5; + phi_a2_2 = phi_v0 * 0; + + if ((temp_t1 != phi_a3)) + { + do + { + temp_a1 = phi_a3; + temp_v1 = phi_v1_4 + phi_a2_2; + phi_a3 = phi_a3 + 1; + phi_v1_4 = temp_v1; + phi_a2_2 = phi_a2_2 + phi_v0; + } while ((temp_t1 != phi_a3)) + } + phi_a1_2 = temp_a1; + phi_v1_2 = temp_v1; + phi_v1_3 = temp_v1; + if (temp_a1 == arg0) + { + goto block_8; + } + } + phi_a1 = phi_a1_2; + phi_a2 = phi_v0 * phi_a1_2; + phi_a3_2 = phi_v0 * (phi_a1_2 + 1); + phi_t0 = phi_v0 * (phi_a1_2 + 2); + phi_t1 = phi_v0 * (phi_a1_2 + 3); + + if ((temp_a1_2 != arg0)) + { + do + { + temp_v1_2 = (((phi_v1_3 + phi_a2) + phi_a3_2) + phi_t0) + phi_t1; + temp_a1_2 = phi_a1 + 4; + phi_a1 = temp_a1_2; + phi_v1_2 = temp_v1_2; + phi_v1_3 = temp_v1_2; + phi_a2 = phi_a2 + (phi_v0 * 4); + phi_a3_2 = phi_a3_2 + (phi_v0 * 4); + phi_t0 = phi_t0 + (phi_v0 * 4); + phi_t1 = phi_t1 + (phi_v0 * 4); + } while ((temp_a1_2 != arg0)) + } + } +block_8: + temp_v0 = phi_v0 + 1; + phi_v0 = temp_v0; + phi_v1 = phi_v1_2; + phi_v1_5 = phi_v1_2; + if (temp_v0 != arg0) + { + goto loop_1; + } + } + return phi_v1; +} diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 1cb4f301..ac5435a2 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -1 +1,82 @@ -CRASHED +s32 test(u32 arg0, s32 arg1, s32 arg2) +{ + s32 temp_a2; + s32 temp_t0; + s32 temp_v0; + s32 temp_v0_2; + s32 temp_v0_3; + s32 temp_v1; + s32 temp_v1_2; + s32 phi_v0; + void *phi_a0; + s32 phi_v1; + s32 phi_v0_2; + void *phi_a0_2; + s32 phi_v1_2; + s32 phi_return; + s32 phi_a2; + s32 phi_a2_2; + + temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; + phi_return = temp_v0; + if (arg1 == 0) + { + temp_t0 = arg1 & 3; + phi_v0_2 = temp_v0; + phi_a2 = arg2; + phi_v1_2 = 0; + if (temp_t0 != 0) + { + phi_v0 = temp_v0; + phi_a0 = (arg2 * 4) + &D_8015F668; + phi_v1 = 0; + phi_a2_2 = arg2; + + if ((temp_t0 != temp_v1)) + { + do + { + *phi_a0 = (s32) phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + temp_a2 = phi_a2_2 + 1; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_a2_2 = temp_a2; + } while ((temp_t0 != temp_v1)) + } + phi_return = temp_v0_2; + phi_v0_2 = temp_v0_2; + phi_a2 = temp_a2; + phi_v1_2 = temp_v1; + if (temp_v1 == arg1) + { + goto block_7; + } + } + phi_a0_2 = (phi_a2 * 4) + &D_8015F668; + + if ((temp_v1_2 != arg1)) + { + do + { + phi_a0_2->unk0 = (s32) phi_v0_2; + temp_v0_3 = phi_v0_2 + 0x10; + phi_a0_2->unk4 = temp_v0_3; + temp_v0_3 = temp_v0_3 + 0x10; + phi_a0_2->unk8 = temp_v0_3; + temp_v0_3 = temp_v0_3 + 0x10; + phi_a0_2->unkC = temp_v0_3; + temp_v1_2 = phi_v1_2 + 4; + temp_v0_3 = temp_v0_3 + 0x10; + phi_v0_2 = temp_v0_3; + phi_a0_2 = phi_a0_2 + 0x10; + phi_v1_2 = temp_v1_2; + phi_return = temp_v0_3; + } while ((temp_v1_2 != arg1)) + } + } +block_7: + return phi_return; +} diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 6bff032b..42c6eb09 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -9,23 +9,22 @@ s32 test(s32 arg0) if ((temp_a0 == 5)) { do - { - D_410150 = (s32) phi_a0; - temp_a0 = phi_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - sp4 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410150 = sp4; - phi_a0 = temp_a0; + {D_410150 = (s32) phi_a0; +temp_a0 = phi_a0 + 1; +D_410150 = temp_a0; +temp_a0 = temp_a0 + 1; +D_410150 = temp_a0; +temp_a0 = temp_a0 + 1; +D_410150 = temp_a0; +sp4 = temp_a0; +temp_a0 = temp_a0 + 1; +D_410150 = temp_a0; +D_410150 = temp_a0; +temp_a0 = temp_a0 + 1; +D_410150 = temp_a0; +temp_a0 = temp_a0 + 1; +D_410150 = sp4; +phi_a0 = temp_a0; } while ((temp_a0 == 5)) } return sp4; From e35518b006c707b653e1998f30f623bb6f052ff4 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 16:07:17 -0700 Subject: [PATCH 07/54] Fix indentation of simple do-whiles --- src/if_statements.py | 11 +++---- tests/end_to_end/loop/irix-g-out.c | 7 ++-- tests/end_to_end/loop_nested/irix-g-out.c | 5 +-- .../end_to_end/multiple-assigns/irix-g-out.c | 33 ++++++++++--------- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 0e16ec4f..55426b94 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -597,17 +597,16 @@ def pattern_match_against_simple_do_while_loop( if statement.should_write() ] - body_statements = [ - statement - for statement in node_1.block.block_info.to_write - if statement.should_write() - ] + do_while_body = Body(False, []) + emit_node(context, node_1, do_while_body, indent + 8) + assert isinstance(node_1.block.block_info, BlockInfo) + assert node_1.block.block_info.branch_condition return DoWhileLoop( indent, context.options.coding_style, node_2, - Body(False, body_statements), + do_while_body, initialization_statements, node_1.block.block_info.branch_condition, ) diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 4ac30748..6390f2a8 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -8,9 +8,10 @@ void test(s32 arg0, s32 arg1) if (((temp_t9 < arg1) != 0)) { do - {*(arg0 + sp4) = (u8)0; -temp_t9 = sp4 + 1; -sp4 = temp_t9; + { + *(arg0 + sp4) = (u8)0; + temp_t9 = sp4 + 1; + sp4 = temp_t9; } while (((temp_t9 < arg1) != 0)) } } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 5a299d9c..7e5386ee 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -14,8 +14,9 @@ s32 test(s32 arg0) if (((sp4 < arg0) != 0)) { do - {sp8 = sp8 + (spC * sp4); -sp4 = sp4 + 1; + { + sp8 = sp8 + (spC * sp4); + sp4 = sp4 + 1; } while (((sp4 < arg0) != 0)) } spC = spC + 1; diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 42c6eb09..6bff032b 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -9,22 +9,23 @@ s32 test(s32 arg0) if ((temp_a0 == 5)) { do - {D_410150 = (s32) phi_a0; -temp_a0 = phi_a0 + 1; -D_410150 = temp_a0; -temp_a0 = temp_a0 + 1; -D_410150 = temp_a0; -temp_a0 = temp_a0 + 1; -D_410150 = temp_a0; -sp4 = temp_a0; -temp_a0 = temp_a0 + 1; -D_410150 = temp_a0; -D_410150 = temp_a0; -temp_a0 = temp_a0 + 1; -D_410150 = temp_a0; -temp_a0 = temp_a0 + 1; -D_410150 = sp4; -phi_a0 = temp_a0; + { + D_410150 = (s32) phi_a0; + temp_a0 = phi_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + sp4 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410150 = sp4; + phi_a0 = temp_a0; } while ((temp_a0 == 5)) } return sp4; From db72018d385e7458e09507a7589870be455d84d6 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 16:20:11 -0700 Subject: [PATCH 08/54] Negate loop-gating condition --- src/if_statements.py | 2 +- tests/end_to_end/loop/irix-o2-noandor-out.c | 2 +- tests/end_to_end/loop/irix-o2-out.c | 2 +- tests/end_to_end/loop_nested/irix-o2-out.c | 2 +- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 55426b94..8bf7df2e 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -753,7 +753,7 @@ def pattern_match_against_unrolled_while_loop( ) should_loop = IfElseStatement( - start.block.block_info.branch_condition, + start.block.block_info.branch_condition.negated(), indent, context.options.coding_style, main_body, diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c index 8133a260..ecbfc96d 100644 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -12,7 +12,7 @@ s32 test(void *arg0, s32 arg1) s32 phi_v0_3; phi_return = 0; - if (arg1 <= 0) + if (arg1 > 0) { temp_a3 = arg1 & 3; phi_v0_3 = 0; diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 8133a260..ecbfc96d 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -12,7 +12,7 @@ s32 test(void *arg0, s32 arg1) s32 phi_v0_3; phi_return = 0; - if (arg1 <= 0) + if (arg1 > 0) { temp_a3 = arg1 & 3; phi_v0_3 = 0; diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index e90b538a..77b651b1 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -28,7 +28,7 @@ s32 test(s32 arg0) { loop_1: phi_v1_2 = phi_v1_5; - if (arg0 <= 0) + if (arg0 > 0) { temp_t1 = arg0 & 3; phi_a1_2 = 0; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index ac5435a2..0ec40b8b 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -19,7 +19,7 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; phi_return = temp_v0; - if (arg1 == 0) + if (arg1 != 0) { temp_t0 = arg1 & 3; phi_v0_2 = temp_v0; From b3b8e220e38dfffab20cda7127c46541ad0f7319 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 18:18:24 -0700 Subject: [PATCH 09/54] Divorce if-statement from do-while loop --- src/if_statements.py | 86 +++++++++++-------- tests/end_to_end/loop/irix-g-out.c | 3 +- tests/end_to_end/loop/irix-o2-noandor-out.c | 42 ++++----- tests/end_to_end/loop/irix-o2-out.c | 42 ++++----- tests/end_to_end/loop_nested/irix-g-out.c | 3 +- tests/end_to_end/loop_nested/irix-o2-out.c | 44 ++++------ tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 58 ++++++------- .../end_to_end/multiple-assigns/irix-g-out.c | 3 +- 8 files changed, 131 insertions(+), 150 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 8bf7df2e..a747adbb 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -113,7 +113,7 @@ class DoWhileLoop: end_node: Node = attr.ib() body: "Body" = attr.ib() - initialization: List[TranslateStatement] = attr.ib(factory=list) + initialization: List[TranslateStatement] = attr.ib(factory=list) # deprecated condition: Optional[Condition] = attr.ib(default=None) def should_write(self) -> bool: @@ -124,26 +124,14 @@ def __str__(self) -> str: space_2 = " " * (self.indent + 4) space_3 = " " * (self.indent + 8) brace_after_if = f"\n{space}{{" if self.coding_style.newline_after_if else " {" - brace_after_do = ( - f"\n{space_2}{{" if self.coding_style.newline_after_if else " {" - ) + brace_after_do = f"\n{space}{{" if self.coding_style.newline_after_if else " {" - init = "\n".join(str(stmt) for stmt in self.initialization) cond = str(self.condition).rstrip(";") if self.condition else "" body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ - f"{space}{init}", - f"{space}if ({cond}){brace_after_if}", - f"{space_2}do{brace_after_do}{body}", - f"{space_2}}} while ({cond})", - f"{space}}}", + f"{space}do{brace_after_do}{body}", + f"{space}}} while ({cond})", ] - # Remnant of for-loops, will eventually resurrect - # string_components = [ - # f"{space}for ({init}; {cond}; {after}){brace_after_if}", - # str(self.body), # has its own indentation - # f"{space}}}", - # ] return "\n".join(string_components) @@ -577,8 +565,11 @@ def add_return_statement( def pattern_match_against_simple_do_while_loop( - context: Context, start: ConditionalNode, indent: int -) -> Optional[DoWhileLoop]: + context: Context, + start: ConditionalNode, + indent: int + # ) -> Optional[DoWhileLoop]: +) -> Optional[Tuple[Node, IfElseStatement, Node]]: node_1 = start.fallthrough_edge node_2 = start.conditional_edge @@ -591,25 +582,48 @@ def pattern_match_against_simple_do_while_loop( ): return None - initialization_statements = [ - statement - for statement in start.block.block_info.to_write - if statement.should_write() - ] - - do_while_body = Body(False, []) - emit_node(context, node_1, do_while_body, indent + 8) - + # initialization_statements = [ + # statement + # for statement in start.block.block_info.to_write + # if statement.should_write() + # ] + + # do_while_body = Body(False, []) + # emit_node(context, node_1, do_while_body, indent + 8) + + # assert isinstance(node_1.block.block_info, BlockInfo) + # assert node_1.block.block_info.branch_condition + # return DoWhileLoop( + # indent, + # context.options.coding_style, + # node_2, + # do_while_body, + # initialization_statements, + # node_1.block.block_info.branch_condition, + # ) + assert isinstance(start.block.block_info, BlockInfo) assert isinstance(node_1.block.block_info, BlockInfo) + assert start.block.block_info.branch_condition assert node_1.block.block_info.branch_condition - return DoWhileLoop( - indent, + + loop_body = Body(False, []) + emit_node(context, node_1, loop_body, indent + 8) + + do_while = DoWhileLoop( + indent + 4, context.options.coding_style, node_2, - do_while_body, - initialization_statements, + loop_body, + [], node_1.block.block_info.branch_condition, ) + should_loop = IfElseStatement( + start.block.block_info.branch_condition.negated(), + indent, + context.options.coding_style, + Body(False, [do_while]), + ) + return (start, should_loop, node_2) def pattern_match_against_unrolled_while_loop( @@ -710,7 +724,7 @@ def pattern_match_against_unrolled_while_loop( first_loop_metabody = Body(False, []) emit_node(context, node_2, first_loop_metabody, indent + 8) first_loop_body = Body(False, []) - emit_node(context, node_3, first_loop_body, indent + 16) + emit_node(context, node_3, first_loop_body, indent + 12) first_loop_metabody.add_statement( DoWhileLoop( indent + 8, @@ -740,7 +754,7 @@ def pattern_match_against_unrolled_while_loop( main_body.add_statement(first_loop_if) emit_node(context, node_5, main_body, indent + 4) second_loop_body = Body(False, []) - emit_node(context, node_6, second_loop_body, indent + 12) + emit_node(context, node_6, second_loop_body, indent + 8) main_body.add_statement( DoWhileLoop( indent + 4, @@ -846,8 +860,10 @@ def build_flowgraph_between( context, curr_start, indent ) if do_while_loop: - body.add_do_while_loop(do_while_loop) - curr_start = do_while_loop.end_node + (_, loop_if_statement, curr_end) = do_while_loop + # emit_node(context, curr_start, body, indent) + body.add_if_else(loop_if_statement) + curr_start = curr_end continue # Same thing for giant unrolled loops: diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 6390f2a8..9b8b0d4d 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -4,8 +4,7 @@ void test(s32 arg0, s32 arg1) s32 temp_t9; sp4 = 0; - sp4 = 0; - if (((temp_t9 < arg1) != 0)) + if (arg1 > 0) { do { diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c index ecbfc96d..3d6abf9f 100644 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -20,17 +20,13 @@ s32 test(void *arg0, s32 arg1) { phi_v1 = arg0; phi_v0 = 0; - - if ((temp_a3 != temp_v0)) + do { - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)) - } + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)) phi_return = temp_v0; phi_v0_3 = temp_v0; if (temp_v0 == arg1) @@ -40,22 +36,18 @@ s32 test(void *arg0, s32 arg1) } phi_v1_2 = arg0 + phi_v0_3; phi_v0_2 = phi_v0_3; - - if ((temp_v0_2 != arg1)) + do { - do - { - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)) - } + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + } while ((temp_v0_2 != arg1)) } block_7: return phi_return; diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index ecbfc96d..3d6abf9f 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -20,17 +20,13 @@ s32 test(void *arg0, s32 arg1) { phi_v1 = arg0; phi_v0 = 0; - - if ((temp_a3 != temp_v0)) + do { - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)) - } + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)) phi_return = temp_v0; phi_v0_3 = temp_v0; if (temp_v0 == arg1) @@ -40,22 +36,18 @@ s32 test(void *arg0, s32 arg1) } phi_v1_2 = arg0 + phi_v0_3; phi_v0_2 = phi_v0_3; - - if ((temp_v0_2 != arg1)) + do { - do - { - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)) - } + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + } while ((temp_v0_2 != arg1)) } block_7: return phi_return; diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 7e5386ee..89585e71 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -10,8 +10,7 @@ s32 test(s32 arg0) { loop_1: sp4 = 0; - sp4 = 0; - if (((sp4 < arg0) != 0)) + if (sp4 < arg0) { do { diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 77b651b1..dff4787e 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -38,18 +38,14 @@ s32 test(s32 arg0) phi_a3 = 1; phi_v1_4 = phi_v1_5; phi_a2_2 = phi_v0 * 0; - - if ((temp_t1 != phi_a3)) + do { - do - { - temp_a1 = phi_a3; - temp_v1 = phi_v1_4 + phi_a2_2; - phi_a3 = phi_a3 + 1; - phi_v1_4 = temp_v1; - phi_a2_2 = phi_a2_2 + phi_v0; - } while ((temp_t1 != phi_a3)) - } + temp_a1 = phi_a3; + temp_v1 = phi_v1_4 + phi_a2_2; + phi_a3 = phi_a3 + 1; + phi_v1_4 = temp_v1; + phi_a2_2 = phi_a2_2 + phi_v0; + } while ((temp_t1 != phi_a3)) phi_a1_2 = temp_a1; phi_v1_2 = temp_v1; phi_v1_3 = temp_v1; @@ -63,22 +59,18 @@ s32 test(s32 arg0) phi_a3_2 = phi_v0 * (phi_a1_2 + 1); phi_t0 = phi_v0 * (phi_a1_2 + 2); phi_t1 = phi_v0 * (phi_a1_2 + 3); - - if ((temp_a1_2 != arg0)) + do { - do - { - temp_v1_2 = (((phi_v1_3 + phi_a2) + phi_a3_2) + phi_t0) + phi_t1; - temp_a1_2 = phi_a1 + 4; - phi_a1 = temp_a1_2; - phi_v1_2 = temp_v1_2; - phi_v1_3 = temp_v1_2; - phi_a2 = phi_a2 + (phi_v0 * 4); - phi_a3_2 = phi_a3_2 + (phi_v0 * 4); - phi_t0 = phi_t0 + (phi_v0 * 4); - phi_t1 = phi_t1 + (phi_v0 * 4); - } while ((temp_a1_2 != arg0)) - } + temp_v1_2 = (((phi_v1_3 + phi_a2) + phi_a3_2) + phi_t0) + phi_t1; + temp_a1_2 = phi_a1 + 4; + phi_a1 = temp_a1_2; + phi_v1_2 = temp_v1_2; + phi_v1_3 = temp_v1_2; + phi_a2 = phi_a2 + (phi_v0 * 4); + phi_a3_2 = phi_a3_2 + (phi_v0 * 4); + phi_t0 = phi_t0 + (phi_v0 * 4); + phi_t1 = phi_t1 + (phi_v0 * 4); + } while ((temp_a1_2 != arg0)) } block_8: temp_v0 = phi_v0 + 1; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 7575b3fd..ac3e740d 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -31,21 +31,17 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) phi_a0 = (arg2 * 4) + &D_8015F668; phi_v1 = 0; phi_a2_2 = arg2; - - if ((temp_t0 != temp_v1)) + do { - do - { - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - temp_a2 = phi_a2_2 + 1; - phi_v0 = temp_v0_2; - phi_a0 = phi_a0 + 4; - phi_v1 = temp_v1; - phi_a2_2 = temp_a2; - } while ((temp_t0 != temp_v1)) - } + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + temp_a2 = phi_a2_2 + 1; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_a2_2 = temp_a2; + } while ((temp_t0 != temp_v1)) phi_return = temp_v0_2; phi_v0_2 = temp_v0_2; phi_a2 = temp_a2; @@ -56,26 +52,22 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) } } phi_a0_2 = (phi_a2 * 4) + &D_8015F668; - - if ((temp_v1_2 != arg1)) + do { - do - { - phi_a0_2->unk0 = phi_v0_2; - temp_v0_3 = phi_v0_2 + 0x10; - phi_a0_2->unk4 = temp_v0_3; - temp_v0_3 = temp_v0_3 + 0x10; - phi_a0_2->unk8 = temp_v0_3; - temp_v0_3 = temp_v0_3 + 0x10; - phi_a0_2->unkC = temp_v0_3; - temp_v1_2 = phi_v1_2 + 4; - temp_v0_3 = temp_v0_3 + 0x10; - phi_v0_2 = temp_v0_3; - phi_a0_2 = phi_a0_2 + 0x10; - phi_v1_2 = temp_v1_2; - phi_return = temp_v0_3; - } while ((temp_v1_2 != arg1)) - } + phi_a0_2->unk0 = phi_v0_2; + temp_v0_3 = phi_v0_2 + 0x10; + phi_a0_2->unk4 = temp_v0_3; + temp_v0_3 = temp_v0_3 + 0x10; + phi_a0_2->unk8 = temp_v0_3; + temp_v0_3 = temp_v0_3 + 0x10; + phi_a0_2->unkC = temp_v0_3; + temp_v1_2 = phi_v1_2 + 4; + temp_v0_3 = temp_v0_3 + 0x10; + phi_v0_2 = temp_v0_3; + phi_a0_2 = phi_a0_2 + 0x10; + phi_v1_2 = temp_v1_2; + phi_return = temp_v0_3; + } while ((temp_v1_2 != arg1)) } block_7: return phi_return; diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 462b845a..1a4e7c7b 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -5,8 +5,7 @@ s32 test(s32 arg0) s32 phi_a0; phi_a0 = arg0; - phi_a0 = arg0; - if ((temp_a0 == 5)) + if (arg0 == 5) { do { From 09e85cbc5c4f92aaa52063a354f4814c763a3cb3 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 18:19:01 -0700 Subject: [PATCH 10/54] Add missing semicolon after do-while --- src/if_statements.py | 2 +- tests/end_to_end/loop/irix-g-out.c | 2 +- tests/end_to_end/loop/irix-o2-noandor-out.c | 4 ++-- tests/end_to_end/loop/irix-o2-out.c | 4 ++-- tests/end_to_end/loop_nested/irix-g-out.c | 2 +- tests/end_to_end/loop_nested/irix-o2-out.c | 4 ++-- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 4 ++-- tests/end_to_end/multiple-assigns/irix-g-out.c | 2 +- 8 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index a747adbb..ea2b7f3e 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -130,7 +130,7 @@ def __str__(self) -> str: body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ f"{space}do{brace_after_do}{body}", - f"{space}}} while ({cond})", + f"{space}}} while ({cond});", ] return "\n".join(string_components) diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 9b8b0d4d..b0fc2265 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -11,6 +11,6 @@ void test(s32 arg0, s32 arg1) *(arg0 + sp4) = (u8)0; temp_t9 = sp4 + 1; sp4 = temp_t9; - } while (((temp_t9 < arg1) != 0)) + } while (((temp_t9 < arg1) != 0)); } } diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c index 3d6abf9f..7b8dd9da 100644 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -26,7 +26,7 @@ s32 test(void *arg0, s32 arg1) *phi_v1 = (u8)0; phi_v1 = phi_v1 + 1; phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)) + } while ((temp_a3 != temp_v0)); phi_return = temp_v0; phi_v0_3 = temp_v0; if (temp_v0 == arg1) @@ -47,7 +47,7 @@ s32 test(void *arg0, s32 arg1) phi_v1_2 = temp_v1; phi_v0_2 = temp_v0_2; phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)) + } while ((temp_v0_2 != arg1)); } block_7: return phi_return; diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 3d6abf9f..7b8dd9da 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -26,7 +26,7 @@ s32 test(void *arg0, s32 arg1) *phi_v1 = (u8)0; phi_v1 = phi_v1 + 1; phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)) + } while ((temp_a3 != temp_v0)); phi_return = temp_v0; phi_v0_3 = temp_v0; if (temp_v0 == arg1) @@ -47,7 +47,7 @@ s32 test(void *arg0, s32 arg1) phi_v1_2 = temp_v1; phi_v0_2 = temp_v0_2; phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)) + } while ((temp_v0_2 != arg1)); } block_7: return phi_return; diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 89585e71..b998c837 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -16,7 +16,7 @@ s32 test(s32 arg0) { sp8 = sp8 + (spC * sp4); sp4 = sp4 + 1; - } while (((sp4 < arg0) != 0)) + } while (((sp4 < arg0) != 0)); } spC = spC + 1; if (spC < arg0) diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index dff4787e..b513cf9c 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -45,7 +45,7 @@ s32 test(s32 arg0) phi_a3 = phi_a3 + 1; phi_v1_4 = temp_v1; phi_a2_2 = phi_a2_2 + phi_v0; - } while ((temp_t1 != phi_a3)) + } while ((temp_t1 != phi_a3)); phi_a1_2 = temp_a1; phi_v1_2 = temp_v1; phi_v1_3 = temp_v1; @@ -70,7 +70,7 @@ s32 test(s32 arg0) phi_a3_2 = phi_a3_2 + (phi_v0 * 4); phi_t0 = phi_t0 + (phi_v0 * 4); phi_t1 = phi_t1 + (phi_v0 * 4); - } while ((temp_a1_2 != arg0)) + } while ((temp_a1_2 != arg0)); } block_8: temp_v0 = phi_v0 + 1; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index ac3e740d..b27113a0 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -41,7 +41,7 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) phi_a0 = phi_a0 + 4; phi_v1 = temp_v1; phi_a2_2 = temp_a2; - } while ((temp_t0 != temp_v1)) + } while ((temp_t0 != temp_v1)); phi_return = temp_v0_2; phi_v0_2 = temp_v0_2; phi_a2 = temp_a2; @@ -67,7 +67,7 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) phi_a0_2 = phi_a0_2 + 0x10; phi_v1_2 = temp_v1_2; phi_return = temp_v0_3; - } while ((temp_v1_2 != arg1)) + } while ((temp_v1_2 != arg1)); } block_7: return phi_return; diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 1a4e7c7b..b60561cd 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -25,7 +25,7 @@ s32 test(s32 arg0) temp_a0 = temp_a0 + 1; D_410150 = sp4; phi_a0 = temp_a0; - } while ((temp_a0 == 5)) + } while ((temp_a0 == 5)); } return sp4; } From 0a1156bd2287773d25311f12ec935f41ac380b47 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 19:21:25 -0700 Subject: [PATCH 11/54] Delete a bunch of dead code --- src/if_statements.py | 45 +------------------------------------------- 1 file changed, 1 insertion(+), 44 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index ea2b7f3e..534269d2 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -110,10 +110,7 @@ class DoWhileLoop: indent: int = attr.ib() coding_style: CodingStyle = attr.ib() - end_node: Node = attr.ib() body: "Body" = attr.ib() - - initialization: List[TranslateStatement] = attr.ib(factory=list) # deprecated condition: Optional[Condition] = attr.ib(default=None) def should_write(self) -> bool: @@ -135,18 +132,6 @@ def __str__(self) -> str: return "\n".join(string_components) -@attr.s -class UnrolledLoop: - indent: int = attr.ib() - coding_style: CodingStyle = attr.ib() - - end_node: Node = attr.ib() - body: "Body" = attr.ib() - - initialization: List[TranslateStatement] = attr.ib(factory=list) - condition: Optional[Condition] = attr.ib(default=None) - - Statement = Union[SimpleStatement, IfElseStatement, LabelStatement, DoWhileLoop] @@ -565,10 +550,7 @@ def add_return_statement( def pattern_match_against_simple_do_while_loop( - context: Context, - start: ConditionalNode, - indent: int - # ) -> Optional[DoWhileLoop]: + context: Context, start: ConditionalNode, indent: int ) -> Optional[Tuple[Node, IfElseStatement, Node]]: node_1 = start.fallthrough_edge node_2 = start.conditional_edge @@ -582,25 +564,6 @@ def pattern_match_against_simple_do_while_loop( ): return None - # initialization_statements = [ - # statement - # for statement in start.block.block_info.to_write - # if statement.should_write() - # ] - - # do_while_body = Body(False, []) - # emit_node(context, node_1, do_while_body, indent + 8) - - # assert isinstance(node_1.block.block_info, BlockInfo) - # assert node_1.block.block_info.branch_condition - # return DoWhileLoop( - # indent, - # context.options.coding_style, - # node_2, - # do_while_body, - # initialization_statements, - # node_1.block.block_info.branch_condition, - # ) assert isinstance(start.block.block_info, BlockInfo) assert isinstance(node_1.block.block_info, BlockInfo) assert start.block.block_info.branch_condition @@ -612,9 +575,7 @@ def pattern_match_against_simple_do_while_loop( do_while = DoWhileLoop( indent + 4, context.options.coding_style, - node_2, loop_body, - [], node_1.block.block_info.branch_condition, ) should_loop = IfElseStatement( @@ -729,9 +690,7 @@ def pattern_match_against_unrolled_while_loop( DoWhileLoop( indent + 8, context.options.coding_style, - node_4, first_loop_body, - [], node_3.block.block_info.branch_condition, ) ) @@ -759,9 +718,7 @@ def pattern_match_against_unrolled_while_loop( DoWhileLoop( indent + 4, context.options.coding_style, - node_7, second_loop_body, - [], node_6.block.block_info.branch_condition, ), ) From 03a8ab03c55289d98d2146220a2437e9f350a8de Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 21:38:19 -0700 Subject: [PATCH 12/54] Introduce crazy loop-rerolling with an option to disable --- src/if_statements.py | 177 +++++++++++++----- src/main.py | 7 + src/options.py | 1 + tests/end_to_end/loop/irix-o2-noandor-out.c | 46 +---- tests/end_to_end/loop/irix-o2-out.c | 46 +---- tests/end_to_end/loop_nested/irix-o2-out.c | 56 ++---- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 67 ++----- 7 files changed, 185 insertions(+), 215 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 534269d2..08eb68c1 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -22,7 +22,13 @@ FunctionInfo, ) from .translate import Statement as TranslateStatement -from .translate import Type, simplify_condition, stringify_expr +from .translate import ( + Type, + simplify_condition, + stringify_expr, + EvalOnceStmt, + EvalOnceExpr, +) @attr.s @@ -589,7 +595,79 @@ def pattern_match_against_simple_do_while_loop( def pattern_match_against_unrolled_while_loop( context: Context, start: ConditionalNode, indent: int -) -> Optional[Tuple[Node, IfElseStatement, Node]]: +) -> Optional[ + Tuple[Node, Union[IfElseStatement, Tuple[Node, Node, DoWhileLoop]], Node] +]: + """ + A common-case for-loop is: + + ```c + for (i = 0; i < length; i++) { + // code + } + ``` + + which, at least for IRIX -O2, is compiled roughly like: + + ```c + for (i = 0; i < (length % 4); i++) { + // code in node 3 + } + for (i = (length % 4); i < length; i += 4) { + // code, repeated for i through i + 3 + // aka code in node 6 + } + ``` + + which is *actually* compiled exactly like this: + + ```c + [node_0] + if ([node_0.condition]) { + [node_1] + if (![node_1.condition]) { + [node_2] + while ([node_3.condition]) { + [node_3] + } + [node_4] + if ([node_4.condition]) { + goto label_7 + } + } + [node_5] + while ([node_6.condition]) { + [node_6] + } + } + label_7: + [node_7] + ``` + + This function aims to detect such loops, then emit them in a useful way. + + If `options.loop_rerolling` is disabled, the bottom, more literal + interpretation, is emitted. + + Otherwise, the following is emitted: + ```c + [node_0] + [node_1_MODIFIED] + [node_2] + while ([node_3.condition]) { + [node_3] + } + [node_7] + ``` + where the MODIFIED suffix indicates that we will not be using the + above-mentioned `length % 4`, but instead just `length`. Note that + this interpretation discards all of nodes 4, 5, and 6, as well as + several needless short-circuits. + + As you can see, this drastically modified output may be incorrect, + and can be disabled using the --no-reroll flag. + """ + node_1 = start.fallthrough_edge node_7 = start.conditional_edge @@ -629,45 +707,6 @@ def pattern_match_against_unrolled_while_loop( ): return None - # for (i = 0; i < length; i++) { - # // code - # } - # - # becomes - # - # for (i = 0; i < (length % 4); i++) { - # // code in node 3 - # } - # for (i = (length % 4); i < length; i += 4) { - # // code, repeated for i through i + 3 - # // aka code in node 6 - # } - # - # which, much futzing later, becomes - # - # [node_0] - # if ([node_0.condition]) - # { - # [node_1] - # if (![node_1.condition]) - # { - # [node_2] - # while ([node_3.condition]) { - # [node_3] - # } - # [node_4] - # if ([node_4.condition]) { - # goto label_7 - # } - # } - # [node_5] - # while ([node_6.condition]) { - # [node_6] - # } - # } - # label_7: - # [node_7] - assert isinstance(start.block.block_info, BlockInfo) assert isinstance(node_1.block.block_info, BlockInfo) assert isinstance(node_3.block.block_info, BlockInfo) @@ -686,14 +725,13 @@ def pattern_match_against_unrolled_while_loop( emit_node(context, node_2, first_loop_metabody, indent + 8) first_loop_body = Body(False, []) emit_node(context, node_3, first_loop_body, indent + 12) - first_loop_metabody.add_statement( - DoWhileLoop( - indent + 8, - context.options.coding_style, - first_loop_body, - node_3.block.block_info.branch_condition, - ) + first_loop_while = DoWhileLoop( + indent + 8, + context.options.coding_style, + first_loop_body, + node_3.block.block_info.branch_condition, ) + first_loop_metabody.add_statement(first_loop_while) emit_node(context, node_4, first_loop_metabody, indent + 8) first_loop_metabody.add_statement( IfElseStatement( @@ -729,7 +767,38 @@ def pattern_match_against_unrolled_while_loop( context.options.coding_style, main_body, ) - return (start, should_loop, node_7) + if not context.options.loop_rerolling: + return (start, should_loop, node_7) + + # [node_0] + # [node_1_MODIFIED] + # [node_2] + # while ([node_3.condition]) { + # [node_3] + # } + # [node_7] + to_write = node_1.block.block_info.to_write + original_remainder_taker = to_write[0] + assert isinstance(original_remainder_taker, EvalOnceStmt) + original_expr = original_remainder_taker.expr + assert isinstance(original_expr, EvalOnceExpr) + original_binop = original_expr.wrapped_expr + assert isinstance(original_binop, BinaryOp) + # !!! This is the only line that actually does anything !!! + # This is what replaces the "& 3" with nothing. + new_expr = attr.evolve(original_expr, wrapped_expr=original_binop.left) + no_taking_remainder = attr.evolve(original_remainder_taker, expr=new_expr) + new_to_write = [no_taking_remainder, *to_write[1:]] + new_block_info = attr.evolve(node_1.block.block_info, to_write=new_to_write) + new_block = attr.evolve(node_1.block) + new_block.block_info = new_block_info + node_1_modified = attr.evolve(node_1, block=new_block) + dedented_while_body = Body(False, []) + emit_node(context, node_3, dedented_while_body, indent + 4) + dedented_while = attr.evolve( + first_loop_while, indent=indent, body=dedented_while_body + ) + return (start, (node_1_modified, node_2, dedented_while), node_7) def build_flowgraph_between( @@ -828,8 +897,14 @@ def build_flowgraph_between( context, curr_start, indent ) if unrolled_loop: - (_, loop_if_statement, curr_end) = unrolled_loop - body.add_if_else(loop_if_statement) + (_, statement, curr_end) = unrolled_loop + if isinstance(statement, IfElseStatement): + body.add_if_else(statement) + else: + (node1, node2, do_while) = statement + emit_node(context, node1, body, indent) + emit_node(context, node2, body, indent) + body.add_do_while_loop(do_while) curr_start = curr_end continue diff --git a/src/main.py b/src/main.py index be616220..6f65799a 100644 --- a/src/main.py +++ b/src/main.py @@ -117,6 +117,12 @@ def parse_flags(flags: List[str]) -> Options: help="disable detection of &&/||", action="store_false", ) + parser.add_argument( + "--no-reroll", + dest="loop_rerolling", + help="disable emitting for-loops by un-unrolling (rerolling) while-loops", + action="store_false", + ) parser.add_argument( "--goto", metavar="PATTERN", @@ -203,6 +209,7 @@ def parse_flags(flags: List[str]) -> Options: void=args.void, ifs=args.ifs, andor_detection=args.andor_detection, + loop_rerolling=args.loop_rerolling, goto_patterns=args.goto_patterns, rodata_files=args.rodata_files, stop_on_error=args.stop_on_error, diff --git a/src/options.py b/src/options.py index 245c24b4..83dfaae2 100644 --- a/src/options.py +++ b/src/options.py @@ -18,6 +18,7 @@ class Options: void: bool = attr.ib() ifs: bool = attr.ib() andor_detection: bool = attr.ib() + loop_rerolling: bool = attr.ib() goto_patterns: List[str] = attr.ib() rodata_files: List[str] = attr.ib() stop_on_error: bool = attr.ib() diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c index 7b8dd9da..4076292f 100644 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ b/tests/end_to_end/loop/irix-o2-noandor-out.c @@ -12,43 +12,17 @@ s32 test(void *arg0, s32 arg1) s32 phi_v0_3; phi_return = 0; - if (arg1 > 0) + temp_a3 = arg1; + phi_v0_3 = 0; + phi_v1 = arg0; + phi_v0 = 0; + do { - temp_a3 = arg1 & 3; - phi_v0_3 = 0; - if (temp_a3 != 0) - { - phi_v1 = arg0; - phi_v0 = 0; - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)); - phi_return = temp_v0; - phi_v0_3 = temp_v0; - if (temp_v0 == arg1) - { - goto block_7; - } - } - phi_v1_2 = arg0 + phi_v0_3; - phi_v0_2 = phi_v0_3; - do - { - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)); - } + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)); block_7: return phi_return; } diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 7b8dd9da..4076292f 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -12,43 +12,17 @@ s32 test(void *arg0, s32 arg1) s32 phi_v0_3; phi_return = 0; - if (arg1 > 0) + temp_a3 = arg1; + phi_v0_3 = 0; + phi_v1 = arg0; + phi_v0 = 0; + do { - temp_a3 = arg1 & 3; - phi_v0_3 = 0; - if (temp_a3 != 0) - { - phi_v1 = arg0; - phi_v0 = 0; - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)); - phi_return = temp_v0; - phi_v0_3 = temp_v0; - if (temp_v0 == arg1) - { - goto block_7; - } - } - phi_v1_2 = arg0 + phi_v0_3; - phi_v0_2 = phi_v0_3; - do - { - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)); - } + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)); block_7: return phi_return; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index b513cf9c..de5e6ef7 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -28,50 +28,20 @@ s32 test(s32 arg0) { loop_1: phi_v1_2 = phi_v1_5; - if (arg0 > 0) + temp_t1 = arg0; + phi_a1_2 = 0; + phi_v1_3 = phi_v1_5; + phi_a3 = 1; + phi_v1_4 = phi_v1_5; + phi_a2_2 = phi_v0 * 0; + do { - temp_t1 = arg0 & 3; - phi_a1_2 = 0; - phi_v1_3 = phi_v1_5; - if (temp_t1 != 0) - { - phi_a3 = 1; - phi_v1_4 = phi_v1_5; - phi_a2_2 = phi_v0 * 0; - do - { - temp_a1 = phi_a3; - temp_v1 = phi_v1_4 + phi_a2_2; - phi_a3 = phi_a3 + 1; - phi_v1_4 = temp_v1; - phi_a2_2 = phi_a2_2 + phi_v0; - } while ((temp_t1 != phi_a3)); - phi_a1_2 = temp_a1; - phi_v1_2 = temp_v1; - phi_v1_3 = temp_v1; - if (temp_a1 == arg0) - { - goto block_8; - } - } - phi_a1 = phi_a1_2; - phi_a2 = phi_v0 * phi_a1_2; - phi_a3_2 = phi_v0 * (phi_a1_2 + 1); - phi_t0 = phi_v0 * (phi_a1_2 + 2); - phi_t1 = phi_v0 * (phi_a1_2 + 3); - do - { - temp_v1_2 = (((phi_v1_3 + phi_a2) + phi_a3_2) + phi_t0) + phi_t1; - temp_a1_2 = phi_a1 + 4; - phi_a1 = temp_a1_2; - phi_v1_2 = temp_v1_2; - phi_v1_3 = temp_v1_2; - phi_a2 = phi_a2 + (phi_v0 * 4); - phi_a3_2 = phi_a3_2 + (phi_v0 * 4); - phi_t0 = phi_t0 + (phi_v0 * 4); - phi_t1 = phi_t1 + (phi_v0 * 4); - } while ((temp_a1_2 != arg0)); - } + temp_a1 = phi_a3; + temp_v1 = phi_v1_4 + phi_a2_2; + phi_a3 = phi_a3 + 1; + phi_v1_4 = temp_v1; + phi_a2_2 = phi_a2_2 + phi_v0; + } while ((temp_t1 != phi_a3)); block_8: temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index b27113a0..ea489722 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -19,56 +19,25 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; phi_return = temp_v0; - if (arg1 != 0) + temp_t0 = arg1; + phi_v0_2 = temp_v0; + phi_a2 = arg2; + phi_v1_2 = 0; + phi_v0 = temp_v0; + phi_a0 = (arg2 * 4) + &D_8015F668; + phi_v1 = 0; + phi_a2_2 = arg2; + do { - temp_t0 = arg1 & 3; - phi_v0_2 = temp_v0; - phi_a2 = arg2; - phi_v1_2 = 0; - if (temp_t0 != 0) - { - phi_v0 = temp_v0; - phi_a0 = (arg2 * 4) + &D_8015F668; - phi_v1 = 0; - phi_a2_2 = arg2; - do - { - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - temp_a2 = phi_a2_2 + 1; - phi_v0 = temp_v0_2; - phi_a0 = phi_a0 + 4; - phi_v1 = temp_v1; - phi_a2_2 = temp_a2; - } while ((temp_t0 != temp_v1)); - phi_return = temp_v0_2; - phi_v0_2 = temp_v0_2; - phi_a2 = temp_a2; - phi_v1_2 = temp_v1; - if (temp_v1 == arg1) - { - goto block_7; - } - } - phi_a0_2 = (phi_a2 * 4) + &D_8015F668; - do - { - phi_a0_2->unk0 = phi_v0_2; - temp_v0_3 = phi_v0_2 + 0x10; - phi_a0_2->unk4 = temp_v0_3; - temp_v0_3 = temp_v0_3 + 0x10; - phi_a0_2->unk8 = temp_v0_3; - temp_v0_3 = temp_v0_3 + 0x10; - phi_a0_2->unkC = temp_v0_3; - temp_v1_2 = phi_v1_2 + 4; - temp_v0_3 = temp_v0_3 + 0x10; - phi_v0_2 = temp_v0_3; - phi_a0_2 = phi_a0_2 + 0x10; - phi_v1_2 = temp_v1_2; - phi_return = temp_v0_3; - } while ((temp_v1_2 != arg1)); - } + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + temp_a2 = phi_a2_2 + 1; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_a2_2 = temp_a2; + } while ((temp_t0 != temp_v1)); block_7: return phi_return; } From 0b22145da1d970272a875c3d0a2ca73abbfe99f7 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sun, 26 Apr 2020 21:49:00 -0700 Subject: [PATCH 13/54] Replace --no-andor with --no-reroll in loop test --- .../loop/irix-o2-no-reroll-flags.txt | 1 + tests/end_to_end/loop/irix-o2-no-reroll-out.c | 54 +++++++++++++++++++ ...{irix-o2-noandor.s => irix-o2-no-reroll.s} | 0 .../end_to_end/loop/irix-o2-noandor-flags.txt | 1 - tests/end_to_end/loop/irix-o2-noandor-out.c | 28 ---------- 5 files changed, 55 insertions(+), 29 deletions(-) create mode 100644 tests/end_to_end/loop/irix-o2-no-reroll-flags.txt create mode 100644 tests/end_to_end/loop/irix-o2-no-reroll-out.c rename tests/end_to_end/loop/{irix-o2-noandor.s => irix-o2-no-reroll.s} (100%) delete mode 100644 tests/end_to_end/loop/irix-o2-noandor-flags.txt delete mode 100644 tests/end_to_end/loop/irix-o2-noandor-out.c diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt b/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt new file mode 100644 index 00000000..a0847c20 --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt @@ -0,0 +1 @@ +--no-reroll diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-reroll-out.c new file mode 100644 index 00000000..7b8dd9da --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-no-reroll-out.c @@ -0,0 +1,54 @@ +s32 test(void *arg0, s32 arg1) +{ + s32 temp_a3; + s32 temp_v0; + s32 temp_v0_2; + void *temp_v1; + void *phi_v1; + s32 phi_v0; + void *phi_v1_2; + s32 phi_v0_2; + s32 phi_return; + s32 phi_v0_3; + + phi_return = 0; + if (arg1 > 0) + { + temp_a3 = arg1 & 3; + phi_v0_3 = 0; + if (temp_a3 != 0) + { + phi_v1 = arg0; + phi_v0 = 0; + do + { + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + } while ((temp_a3 != temp_v0)); + phi_return = temp_v0; + phi_v0_3 = temp_v0; + if (temp_v0 == arg1) + { + goto block_7; + } + } + phi_v1_2 = arg0 + phi_v0_3; + phi_v0_2 = phi_v0_3; + do + { + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + } while ((temp_v0_2 != arg1)); + } +block_7: + return phi_return; +} diff --git a/tests/end_to_end/loop/irix-o2-noandor.s b/tests/end_to_end/loop/irix-o2-no-reroll.s similarity index 100% rename from tests/end_to_end/loop/irix-o2-noandor.s rename to tests/end_to_end/loop/irix-o2-no-reroll.s diff --git a/tests/end_to_end/loop/irix-o2-noandor-flags.txt b/tests/end_to_end/loop/irix-o2-noandor-flags.txt deleted file mode 100644 index 751e7042..00000000 --- a/tests/end_to_end/loop/irix-o2-noandor-flags.txt +++ /dev/null @@ -1 +0,0 @@ ---no-andor diff --git a/tests/end_to_end/loop/irix-o2-noandor-out.c b/tests/end_to_end/loop/irix-o2-noandor-out.c deleted file mode 100644 index 4076292f..00000000 --- a/tests/end_to_end/loop/irix-o2-noandor-out.c +++ /dev/null @@ -1,28 +0,0 @@ -s32 test(void *arg0, s32 arg1) -{ - s32 temp_a3; - s32 temp_v0; - s32 temp_v0_2; - void *temp_v1; - void *phi_v1; - s32 phi_v0; - void *phi_v1_2; - s32 phi_v0_2; - s32 phi_return; - s32 phi_v0_3; - - phi_return = 0; - temp_a3 = arg1; - phi_v0_3 = 0; - phi_v1 = arg0; - phi_v0 = 0; - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)); -block_7: - return phi_return; -} From c937919383876e160d9fd8f5a2e9dcbada06c35b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Tue, 28 Apr 2020 19:12:12 -0700 Subject: [PATCH 14/54] Got loop.c working, but broke nested --- src/flow_graph.py | 10 ++++ src/flow_graph_munge.py | 126 ++++++++++++++++++++++++++++++++++++++++ src/main.py | 16 +++-- src/translate.py | 7 ++- 4 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 src/flow_graph_munge.py diff --git a/src/flow_graph.py b/src/flow_graph.py index c3671191..e422695e 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -521,6 +521,14 @@ class BaseNode: immediate_dominator: Optional["Node"] = attr.ib(init=False, default=None) immediately_dominates: List["Node"] = attr.ib(init=False, factory=list) + def to_basic_node(self, successor: "Node") -> "BasicNode": + new_node = BasicNode(self.block, self.emit_goto, successor) + new_node.parents = self.parents + new_node.dominators = self.dominators + new_node.immediate_dominator = self.immediate_dominator + new_node.immediately_dominates = self.immediately_dominates + return new_node + def add_parent(self, parent: "Node") -> None: self.parents.append(parent) @@ -550,6 +558,8 @@ class ConditionalNode(BaseNode): conditional_edge: "Node" = attr.ib() fallthrough_edge: "Node" = attr.ib() + marked_to_remove_remainder_op: bool = attr.ib(default=False) + def is_loop(self) -> bool: return is_loop_edge(self, self.conditional_edge) diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py new file mode 100644 index 00000000..1863bafc --- /dev/null +++ b/src/flow_graph_munge.py @@ -0,0 +1,126 @@ +import typing +from copy import copy +from typing import List, Optional, Tuple + +import attr + +from .flow_graph import BasicNode, ConditionalNode, FlowGraph, Node, compute_dominators +from .parse_instruction import Instruction + + +def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowGraph]: + node_1 = start.fallthrough_edge + node_7 = start.conditional_edge + + if not isinstance(node_1, ConditionalNode): + return None + node_2 = node_1.fallthrough_edge + node_5 = node_1.conditional_edge + + if not isinstance(node_2, BasicNode): + return None + node_3 = node_2.successor + + if not ( + isinstance(node_3, ConditionalNode) + and node_3.is_loop() + and node_3.conditional_edge.block.index == node_3.block.index + ): + return None + node_4 = node_3.fallthrough_edge + + if not ( + isinstance(node_4, ConditionalNode) + and node_4.fallthrough_edge.block.index == node_5.block.index + and node_4.conditional_edge.block.index == node_7.block.index + ): + return None + + if not isinstance(node_5, BasicNode): + return None + node_6 = node_5.successor + + if not ( + isinstance(node_6, ConditionalNode) + and node_6.is_loop() + and node_6.conditional_edge.block.index == node_6.block.index + and node_6.fallthrough_edge.block.index == node_7.block.index + ): + return None + + modified_node_3 = attr.evolve( + node_3, fallthrough_edge=node_7, marked_to_remove_remainder_op=True, + ) + modified_node_3.conditional_edge = modified_node_3 + modified_node_2 = attr.evolve(node_2, successor=modified_node_3) + modified_node_3.parents = [modified_node_2, modified_node_3] + node_7.parents = [modified_node_3] + + new_instructions_1 = copy(node_1.block.instructions) + branches = list( + filter(lambda instr: instr.is_branch_instruction(), new_instructions_1) + ) + assert len(branches) == 1 + del new_instructions_1[new_instructions_1.index(branches[0])] + # TODO: also remove & 3 here + andis = list(filter(lambda instr: instr.mnemonic == "andi", new_instructions_1)) + assert len(andis) == 1 + new_instructions_1[new_instructions_1.index(andis[0])] = Instruction( + mnemonic="move", args=[andis[0].args[0], andis[0].args[1]] + ) + + new_block_1 = attr.evolve(node_1.block, instructions=new_instructions_1) + modified_node_1 = attr.evolve( + node_1.to_basic_node(successor=modified_node_2), block=new_block_1 + ) + modified_node_2.parents = [modified_node_1] + + new_instructions_0 = copy(start.block.instructions) + branches = list( + filter(lambda instr: instr.is_branch_instruction(), new_instructions_0) + ) + assert len(branches) == 1 + del new_instructions_0[new_instructions_0.index(branches[0])] + new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) + modified_node_0 = attr.evolve( + start.to_basic_node(successor=modified_node_1), block=new_block_0 + ) + modified_node_1.parents = [modified_node_0] + + # TODO: does copy() work? + new_nodes = copy(flow_graph.nodes) + # TODO: do we need to reinterpret .parents? + new_nodes[new_nodes.index(node_3)] = modified_node_3 + new_nodes[new_nodes.index(node_2)] = modified_node_2 + new_nodes[new_nodes.index(node_1)] = modified_node_1 + new_nodes[new_nodes.index(start)] = modified_node_0 + + del new_nodes[new_nodes.index(node_4)] + del new_nodes[new_nodes.index(node_5)] + del new_nodes[new_nodes.index(node_6)] + + compute_dominators(new_nodes) + return attr.evolve(flow_graph, nodes=new_nodes) + + +def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: + # TODO: This is horrible, probably not what I want. + # What if knocking out nodes 4, 5, 6 just reveals another + # set of nodes that look identical? We will incorrectly + # be merging two adjacent for-loops. + changed: bool = True + while changed: + changed = False + for node in flow_graph.nodes: + if not isinstance(node, ConditionalNode): + continue + new_flow_graph = unroll_loop(flow_graph, node) + if new_flow_graph: + flow_graph = new_flow_graph + changed = True + break + return flow_graph + + +def munge_flowgraph(flow_graph: FlowGraph) -> FlowGraph: + return munge_unrolled_loops(flow_graph) diff --git a/src/main.py b/src/main.py index 6f65799a..324ea1d3 100644 --- a/src/main.py +++ b/src/main.py @@ -2,13 +2,14 @@ import sys from typing import List, Optional +from .c_types import TypeMap, build_typemap, dump_typemap from .error import DecompFailure -from .flow_graph import build_flowgraph, visualize_flowgraph +from .flow_graph import FlowGraph, build_flowgraph, visualize_flowgraph +from .flow_graph_munge import munge_flowgraph from .if_statements import get_function_text -from .options import Options, CodingStyle +from .options import CodingStyle, Options from .parse_file import Function, MIPSFile, Rodata, parse_file from .translate import translate_to_ast -from .c_types import TypeMap, build_typemap, dump_typemap def decompile_function( @@ -18,11 +19,16 @@ def decompile_function( print(function) print() + flowgraph: FlowGraph = build_flowgraph(function, rodata) + + if options.loop_rerolling: + flowgraph = munge_flowgraph(flowgraph) + if options.visualize_flowgraph: - visualize_flowgraph(build_flowgraph(function, rodata)) + visualize_flowgraph(flowgraph) return - function_info = translate_to_ast(function, options, rodata, typemap) + function_info = translate_to_ast(function, flowgraph, options, rodata, typemap) function_text = get_function_text(function_info, options) print(function_text) diff --git a/src/translate.py b/src/translate.py index 2c2b076b..8d541307 100644 --- a/src/translate.py +++ b/src/translate.py @@ -2651,7 +2651,11 @@ class FunctionInfo: def translate_to_ast( - function: Function, options: Options, rodata: Rodata, typemap: Optional[TypeMap] + function: Function, + flow_graph: FlowGraph, + options: Options, + rodata: Rodata, + typemap: Optional[TypeMap], ) -> FunctionInfo: """ Given a function, produce a FlowGraph that both contains control-flow @@ -2659,7 +2663,6 @@ def translate_to_ast( branch condition. """ # Initialize info about the function. - flow_graph: FlowGraph = build_flowgraph(function, rodata) start_node = flow_graph.entry_node() stack_info = get_stack_info(function, rodata, start_node, typemap) From b4ed8e04c566f023ce167dc01a0c9079f988a7e3 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Tue, 28 Apr 2020 19:22:49 -0700 Subject: [PATCH 15/54] Temp commit to see what used to work --- src/flow_graph.py | 2 +- src/flow_graph_munge.py | 24 ++++++++++++++--- tests/end_to_end/loop/irix-o2-out.c | 27 +++++++------------ tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 24 ++++++++--------- 4 files changed, 42 insertions(+), 35 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index e422695e..79b57535 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -29,7 +29,7 @@ class Block: block_info: Optional[Any] = None def add_block_info(self, block_info: Any) -> None: - assert self.block_info is None + assert self.block_info is None, breakpoint() self.block_info = block_info def clone(self) -> "Block": diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py index 1863bafc..7597cd60 100644 --- a/src/flow_graph_munge.py +++ b/src/flow_graph_munge.py @@ -54,7 +54,16 @@ def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowG modified_node_3.conditional_edge = modified_node_3 modified_node_2 = attr.evolve(node_2, successor=modified_node_3) modified_node_3.parents = [modified_node_2, modified_node_3] - node_7.parents = [modified_node_3] + # Need to delete deleted nodes ONLY (may have other still-relevant parents) + node_7.parents.append(modified_node_3) + if start in node_7.parents: + del node_7.parents[node_7.parents.index(start)] + if node_4 in node_7.parents: + del node_7.parents[node_7.parents.index(node_4)] + if node_5 in node_7.parents: + del node_7.parents[node_7.parents.index(node_5)] + if node_6 in node_7.parents: + del node_7.parents[node_7.parents.index(node_6)] new_instructions_1 = copy(node_1.block.instructions) branches = list( @@ -82,11 +91,18 @@ def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowG assert len(branches) == 1 del new_instructions_0[new_instructions_0.index(branches[0])] new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) - modified_node_0 = attr.evolve( - start.to_basic_node(successor=modified_node_1), block=new_block_0 - ) + modified_node_0 = start.to_basic_node(successor=modified_node_1) + modified_node_0.block = new_block_0 modified_node_1.parents = [modified_node_0] + # back to node_7: + # if start in node_7.parents: + # node_7.parents[node_7.parents.index(start)] = modified_node_0 + if node_1 in node_7.parents: + node_7.parents[node_7.parents.index(node_1)] = modified_node_1 + if node_2 in node_7.parents: + node_7.parents[node_7.parents.index(node_2)] = modified_node_2 + # TODO: does copy() work? new_nodes = copy(flow_graph.nodes) # TODO: do we need to reinterpret .parents? diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 4076292f..9d949587 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -1,28 +1,19 @@ s32 test(void *arg0, s32 arg1) { - s32 temp_a3; s32 temp_v0; - s32 temp_v0_2; - void *temp_v1; void *phi_v1; s32 phi_v0; - void *phi_v1_2; - s32 phi_v0_2; - s32 phi_return; - s32 phi_v0_3; - phi_return = 0; - temp_a3 = arg1; - phi_v0_3 = 0; phi_v1 = arg0; phi_v0 = 0; - do +loop_3: + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + if (arg1 != temp_v0) { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)); -block_7: - return phi_return; + goto loop_3; + } + return temp_v0; } diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index ea489722..43d5aae1 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -19,7 +19,7 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; phi_return = temp_v0; - temp_t0 = arg1; + temp_t0 = arg1 & 3; phi_v0_2 = temp_v0; phi_a2 = arg2; phi_v1_2 = 0; @@ -27,17 +27,17 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) phi_a0 = (arg2 * 4) + &D_8015F668; phi_v1 = 0; phi_a2_2 = arg2; - do + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + temp_a2 = phi_a2_2 + 1; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_a2_2 = temp_a2; + if (temp_t0 != temp_v1) { - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - temp_a2 = phi_a2_2 + 1; - phi_v0 = temp_v0_2; - phi_a0 = phi_a0 + 4; - phi_v1 = temp_v1; - phi_a2_2 = temp_a2; - } while ((temp_t0 != temp_v1)); -block_7: + goto loop_3; + } return phi_return; } From 75ce71fdb6012d52e57b183b0ba5cd71ae7e2fab Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Tue, 28 Apr 2020 19:47:24 -0700 Subject: [PATCH 16/54] Fix everything --- src/flow_graph.py | 7 +++- src/flow_graph_munge.py | 20 +++++++-- tests/end_to_end/loop_nested/irix-o2-out.c | 42 ++++++------------- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 31 +++----------- 4 files changed, 40 insertions(+), 60 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 79b57535..ea94b808 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -29,7 +29,7 @@ class Block: block_info: Optional[Any] = None def add_block_info(self, block_info: Any) -> None: - assert self.block_info is None, breakpoint() + assert self.block_info is None self.block_info = block_info def clone(self) -> "Block": @@ -863,6 +863,11 @@ def ensure_fallthrough(nodes: List[Node]) -> None: def compute_dominators(nodes: List[Node]) -> None: + for node in nodes: + node.dominators = set() + node.immediate_dominator = None + node.immediately_dominates = [] + entry = nodes[0] entry.dominators = {entry} for n in nodes[1:]: diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py index 7597cd60..c7d8a4d9 100644 --- a/src/flow_graph_munge.py +++ b/src/flow_graph_munge.py @@ -91,13 +91,25 @@ def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowG assert len(branches) == 1 del new_instructions_0[new_instructions_0.index(branches[0])] new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) - modified_node_0 = start.to_basic_node(successor=modified_node_1) - modified_node_0.block = new_block_0 + modified_node_0 = attr.evolve( + start.to_basic_node(successor=modified_node_1), block=new_block_0 + ) + modified_node_0.parents = start.parents + # Behold, the most confusing for-loop ever written. + for parent in start.parents: + if isinstance(parent, ConditionalNode): + if start.block.index == parent.fallthrough_edge.block.index: + parent.fallthrough_edge = modified_node_0 + if start.block.index == parent.conditional_edge.block.index: + parent.conditional_edge = modified_node_0 + elif isinstance(parent, BasicNode): + if start.block.index == parent.successor.block.index: + parent.successor = modified_node_0 modified_node_1.parents = [modified_node_0] # back to node_7: - # if start in node_7.parents: - # node_7.parents[node_7.parents.index(start)] = modified_node_0 + if start in node_7.parents: + node_7.parents[node_7.parents.index(start)] = modified_node_0 if node_1 in node_7.parents: node_7.parents[node_7.parents.index(node_1)] = modified_node_1 if node_2 in node_7.parents: diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index de5e6ef7..fa8cf8fb 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -1,52 +1,34 @@ s32 test(s32 arg0) { - s32 temp_a1; - s32 temp_a1_2; - s32 temp_t1; s32 temp_v0; s32 temp_v1; - s32 temp_v1_2; s32 phi_a3; - s32 phi_a1; s32 phi_v0; s32 phi_v1; - s32 phi_a1_2; s32 phi_v1_2; - s32 phi_v1_3; s32 phi_a2; - s32 phi_a3_2; - s32 phi_t0; - s32 phi_t1; - s32 phi_v1_4; - s32 phi_a2_2; - s32 phi_v1_5; phi_v0 = 0; phi_v1 = 0; - phi_v1_5 = 0; + phi_v1_2 = 0; if (arg0 > 0) { loop_1: - phi_v1_2 = phi_v1_5; - temp_t1 = arg0; - phi_a1_2 = 0; - phi_v1_3 = phi_v1_5; phi_a3 = 1; - phi_v1_4 = phi_v1_5; - phi_a2_2 = phi_v0 * 0; - do + phi_a2 = phi_v0 * 0; +loop_4: + temp_v1 = phi_v1_2 + phi_a2; + phi_a3 = phi_a3 + 1; + phi_v1_2 = temp_v1; + phi_a2 = phi_a2 + phi_v0; + if (arg0 != phi_a3) { - temp_a1 = phi_a3; - temp_v1 = phi_v1_4 + phi_a2_2; - phi_a3 = phi_a3 + 1; - phi_v1_4 = temp_v1; - phi_a2_2 = phi_a2_2 + phi_v0; - } while ((temp_t1 != phi_a3)); -block_8: + goto loop_4; + } temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; - phi_v1 = phi_v1_2; - phi_v1_5 = phi_v1_2; + phi_v1 = temp_v1; + phi_v1_2 = temp_v1; if (temp_v0 != arg0) { goto loop_1; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 43d5aae1..bb08a876 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -1,43 +1,24 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) { - s32 temp_a2; - s32 temp_t0; s32 temp_v0; - s32 temp_v0_2; - s32 temp_v0_3; s32 temp_v1; - s32 temp_v1_2; s32 phi_v0; void *phi_a0; s32 phi_v1; - s32 phi_v0_2; - void *phi_a0_2; - s32 phi_v1_2; - s32 phi_return; - s32 phi_a2; - s32 phi_a2_2; - temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; - phi_return = temp_v0; - temp_t0 = arg1 & 3; - phi_v0_2 = temp_v0; - phi_a2 = arg2; - phi_v1_2 = 0; - phi_v0 = temp_v0; + phi_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; phi_a0 = (arg2 * 4) + &D_8015F668; phi_v1 = 0; - phi_a2_2 = arg2; +loop_3: *phi_a0 = phi_v0; temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - temp_a2 = phi_a2_2 + 1; - phi_v0 = temp_v0_2; + temp_v0 = phi_v0 + 0x10; + phi_v0 = temp_v0; phi_a0 = phi_a0 + 4; phi_v1 = temp_v1; - phi_a2_2 = temp_a2; - if (temp_t0 != temp_v1) + if (arg1 != temp_v1) { goto loop_3; } - return phi_return; + return temp_v0; } From 1997ed49eea6009fe7f1be8c898f7c545f559a05 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Tue, 28 Apr 2020 19:49:00 -0700 Subject: [PATCH 17/54] CRLF -> LF --- src/flow_graph_munge.py | 308 ++++++++++++++++++++-------------------- 1 file changed, 154 insertions(+), 154 deletions(-) diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py index c7d8a4d9..7c7b78e1 100644 --- a/src/flow_graph_munge.py +++ b/src/flow_graph_munge.py @@ -1,154 +1,154 @@ -import typing -from copy import copy -from typing import List, Optional, Tuple - -import attr - -from .flow_graph import BasicNode, ConditionalNode, FlowGraph, Node, compute_dominators -from .parse_instruction import Instruction - - -def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowGraph]: - node_1 = start.fallthrough_edge - node_7 = start.conditional_edge - - if not isinstance(node_1, ConditionalNode): - return None - node_2 = node_1.fallthrough_edge - node_5 = node_1.conditional_edge - - if not isinstance(node_2, BasicNode): - return None - node_3 = node_2.successor - - if not ( - isinstance(node_3, ConditionalNode) - and node_3.is_loop() - and node_3.conditional_edge.block.index == node_3.block.index - ): - return None - node_4 = node_3.fallthrough_edge - - if not ( - isinstance(node_4, ConditionalNode) - and node_4.fallthrough_edge.block.index == node_5.block.index - and node_4.conditional_edge.block.index == node_7.block.index - ): - return None - - if not isinstance(node_5, BasicNode): - return None - node_6 = node_5.successor - - if not ( - isinstance(node_6, ConditionalNode) - and node_6.is_loop() - and node_6.conditional_edge.block.index == node_6.block.index - and node_6.fallthrough_edge.block.index == node_7.block.index - ): - return None - - modified_node_3 = attr.evolve( - node_3, fallthrough_edge=node_7, marked_to_remove_remainder_op=True, - ) - modified_node_3.conditional_edge = modified_node_3 - modified_node_2 = attr.evolve(node_2, successor=modified_node_3) - modified_node_3.parents = [modified_node_2, modified_node_3] - # Need to delete deleted nodes ONLY (may have other still-relevant parents) - node_7.parents.append(modified_node_3) - if start in node_7.parents: - del node_7.parents[node_7.parents.index(start)] - if node_4 in node_7.parents: - del node_7.parents[node_7.parents.index(node_4)] - if node_5 in node_7.parents: - del node_7.parents[node_7.parents.index(node_5)] - if node_6 in node_7.parents: - del node_7.parents[node_7.parents.index(node_6)] - - new_instructions_1 = copy(node_1.block.instructions) - branches = list( - filter(lambda instr: instr.is_branch_instruction(), new_instructions_1) - ) - assert len(branches) == 1 - del new_instructions_1[new_instructions_1.index(branches[0])] - # TODO: also remove & 3 here - andis = list(filter(lambda instr: instr.mnemonic == "andi", new_instructions_1)) - assert len(andis) == 1 - new_instructions_1[new_instructions_1.index(andis[0])] = Instruction( - mnemonic="move", args=[andis[0].args[0], andis[0].args[1]] - ) - - new_block_1 = attr.evolve(node_1.block, instructions=new_instructions_1) - modified_node_1 = attr.evolve( - node_1.to_basic_node(successor=modified_node_2), block=new_block_1 - ) - modified_node_2.parents = [modified_node_1] - - new_instructions_0 = copy(start.block.instructions) - branches = list( - filter(lambda instr: instr.is_branch_instruction(), new_instructions_0) - ) - assert len(branches) == 1 - del new_instructions_0[new_instructions_0.index(branches[0])] - new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) - modified_node_0 = attr.evolve( - start.to_basic_node(successor=modified_node_1), block=new_block_0 - ) - modified_node_0.parents = start.parents - # Behold, the most confusing for-loop ever written. - for parent in start.parents: - if isinstance(parent, ConditionalNode): - if start.block.index == parent.fallthrough_edge.block.index: - parent.fallthrough_edge = modified_node_0 - if start.block.index == parent.conditional_edge.block.index: - parent.conditional_edge = modified_node_0 - elif isinstance(parent, BasicNode): - if start.block.index == parent.successor.block.index: - parent.successor = modified_node_0 - modified_node_1.parents = [modified_node_0] - - # back to node_7: - if start in node_7.parents: - node_7.parents[node_7.parents.index(start)] = modified_node_0 - if node_1 in node_7.parents: - node_7.parents[node_7.parents.index(node_1)] = modified_node_1 - if node_2 in node_7.parents: - node_7.parents[node_7.parents.index(node_2)] = modified_node_2 - - # TODO: does copy() work? - new_nodes = copy(flow_graph.nodes) - # TODO: do we need to reinterpret .parents? - new_nodes[new_nodes.index(node_3)] = modified_node_3 - new_nodes[new_nodes.index(node_2)] = modified_node_2 - new_nodes[new_nodes.index(node_1)] = modified_node_1 - new_nodes[new_nodes.index(start)] = modified_node_0 - - del new_nodes[new_nodes.index(node_4)] - del new_nodes[new_nodes.index(node_5)] - del new_nodes[new_nodes.index(node_6)] - - compute_dominators(new_nodes) - return attr.evolve(flow_graph, nodes=new_nodes) - - -def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: - # TODO: This is horrible, probably not what I want. - # What if knocking out nodes 4, 5, 6 just reveals another - # set of nodes that look identical? We will incorrectly - # be merging two adjacent for-loops. - changed: bool = True - while changed: - changed = False - for node in flow_graph.nodes: - if not isinstance(node, ConditionalNode): - continue - new_flow_graph = unroll_loop(flow_graph, node) - if new_flow_graph: - flow_graph = new_flow_graph - changed = True - break - return flow_graph - - -def munge_flowgraph(flow_graph: FlowGraph) -> FlowGraph: - return munge_unrolled_loops(flow_graph) +import typing +from copy import copy +from typing import List, Optional, Tuple + +import attr + +from .flow_graph import BasicNode, ConditionalNode, FlowGraph, Node, compute_dominators +from .parse_instruction import Instruction + + +def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowGraph]: + node_1 = start.fallthrough_edge + node_7 = start.conditional_edge + + if not isinstance(node_1, ConditionalNode): + return None + node_2 = node_1.fallthrough_edge + node_5 = node_1.conditional_edge + + if not isinstance(node_2, BasicNode): + return None + node_3 = node_2.successor + + if not ( + isinstance(node_3, ConditionalNode) + and node_3.is_loop() + and node_3.conditional_edge.block.index == node_3.block.index + ): + return None + node_4 = node_3.fallthrough_edge + + if not ( + isinstance(node_4, ConditionalNode) + and node_4.fallthrough_edge.block.index == node_5.block.index + and node_4.conditional_edge.block.index == node_7.block.index + ): + return None + + if not isinstance(node_5, BasicNode): + return None + node_6 = node_5.successor + + if not ( + isinstance(node_6, ConditionalNode) + and node_6.is_loop() + and node_6.conditional_edge.block.index == node_6.block.index + and node_6.fallthrough_edge.block.index == node_7.block.index + ): + return None + + modified_node_3 = attr.evolve( + node_3, fallthrough_edge=node_7, marked_to_remove_remainder_op=True, + ) + modified_node_3.conditional_edge = modified_node_3 + modified_node_2 = attr.evolve(node_2, successor=modified_node_3) + modified_node_3.parents = [modified_node_2, modified_node_3] + # Need to delete deleted nodes ONLY (may have other still-relevant parents) + node_7.parents.append(modified_node_3) + if start in node_7.parents: + del node_7.parents[node_7.parents.index(start)] + if node_4 in node_7.parents: + del node_7.parents[node_7.parents.index(node_4)] + if node_5 in node_7.parents: + del node_7.parents[node_7.parents.index(node_5)] + if node_6 in node_7.parents: + del node_7.parents[node_7.parents.index(node_6)] + + new_instructions_1 = copy(node_1.block.instructions) + branches = list( + filter(lambda instr: instr.is_branch_instruction(), new_instructions_1) + ) + assert len(branches) == 1 + del new_instructions_1[new_instructions_1.index(branches[0])] + # TODO: also remove & 3 here + andis = list(filter(lambda instr: instr.mnemonic == "andi", new_instructions_1)) + assert len(andis) == 1 + new_instructions_1[new_instructions_1.index(andis[0])] = Instruction( + mnemonic="move", args=[andis[0].args[0], andis[0].args[1]] + ) + + new_block_1 = attr.evolve(node_1.block, instructions=new_instructions_1) + modified_node_1 = attr.evolve( + node_1.to_basic_node(successor=modified_node_2), block=new_block_1 + ) + modified_node_2.parents = [modified_node_1] + + new_instructions_0 = copy(start.block.instructions) + branches = list( + filter(lambda instr: instr.is_branch_instruction(), new_instructions_0) + ) + assert len(branches) == 1 + del new_instructions_0[new_instructions_0.index(branches[0])] + new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) + modified_node_0 = attr.evolve( + start.to_basic_node(successor=modified_node_1), block=new_block_0 + ) + modified_node_0.parents = start.parents + # Behold, the most confusing for-loop ever written. + for parent in start.parents: + if isinstance(parent, ConditionalNode): + if start.block.index == parent.fallthrough_edge.block.index: + parent.fallthrough_edge = modified_node_0 + if start.block.index == parent.conditional_edge.block.index: + parent.conditional_edge = modified_node_0 + elif isinstance(parent, BasicNode): + if start.block.index == parent.successor.block.index: + parent.successor = modified_node_0 + modified_node_1.parents = [modified_node_0] + + # back to node_7: + if start in node_7.parents: + node_7.parents[node_7.parents.index(start)] = modified_node_0 + if node_1 in node_7.parents: + node_7.parents[node_7.parents.index(node_1)] = modified_node_1 + if node_2 in node_7.parents: + node_7.parents[node_7.parents.index(node_2)] = modified_node_2 + + # TODO: does copy() work? + new_nodes = copy(flow_graph.nodes) + # TODO: do we need to reinterpret .parents? + new_nodes[new_nodes.index(node_3)] = modified_node_3 + new_nodes[new_nodes.index(node_2)] = modified_node_2 + new_nodes[new_nodes.index(node_1)] = modified_node_1 + new_nodes[new_nodes.index(start)] = modified_node_0 + + del new_nodes[new_nodes.index(node_4)] + del new_nodes[new_nodes.index(node_5)] + del new_nodes[new_nodes.index(node_6)] + + compute_dominators(new_nodes) + return attr.evolve(flow_graph, nodes=new_nodes) + + +def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: + # TODO: This is horrible, probably not what I want. + # What if knocking out nodes 4, 5, 6 just reveals another + # set of nodes that look identical? We will incorrectly + # be merging two adjacent for-loops. + changed: bool = True + while changed: + changed = False + for node in flow_graph.nodes: + if not isinstance(node, ConditionalNode): + continue + new_flow_graph = unroll_loop(flow_graph, node) + if new_flow_graph: + flow_graph = new_flow_graph + changed = True + break + return flow_graph + + +def munge_flowgraph(flow_graph: FlowGraph) -> FlowGraph: + return munge_unrolled_loops(flow_graph) From 7919a49c3fca7f766e616082f0f088ac04b14054 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 29 Apr 2020 11:14:55 -0700 Subject: [PATCH 18/54] Remove the code from if_statements that was moved --- src/if_statements.py | 224 ------------------ .../loop/irix-o2-no-andor-flags.txt | 1 + ...no-reroll-out.c => irix-o2-no-andor-out.c} | 0 ...irix-o2-no-reroll.s => irix-o2-no-andor.s} | 0 .../loop/irix-o2-no-reroll-flags.txt | 1 - 5 files changed, 1 insertion(+), 225 deletions(-) create mode 100644 tests/end_to_end/loop/irix-o2-no-andor-flags.txt rename tests/end_to_end/loop/{irix-o2-no-reroll-out.c => irix-o2-no-andor-out.c} (100%) rename tests/end_to_end/loop/{irix-o2-no-reroll.s => irix-o2-no-andor.s} (100%) delete mode 100644 tests/end_to_end/loop/irix-o2-no-reroll-flags.txt diff --git a/src/if_statements.py b/src/if_statements.py index 08eb68c1..397810d8 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -593,214 +593,6 @@ def pattern_match_against_simple_do_while_loop( return (start, should_loop, node_2) -def pattern_match_against_unrolled_while_loop( - context: Context, start: ConditionalNode, indent: int -) -> Optional[ - Tuple[Node, Union[IfElseStatement, Tuple[Node, Node, DoWhileLoop]], Node] -]: - """ - A common-case for-loop is: - - ```c - for (i = 0; i < length; i++) { - // code - } - ``` - - which, at least for IRIX -O2, is compiled roughly like: - - ```c - for (i = 0; i < (length % 4); i++) { - // code in node 3 - } - for (i = (length % 4); i < length; i += 4) { - // code, repeated for i through i + 3 - // aka code in node 6 - } - ``` - - which is *actually* compiled exactly like this: - - ```c - [node_0] - if ([node_0.condition]) { - [node_1] - if (![node_1.condition]) { - [node_2] - while ([node_3.condition]) { - [node_3] - } - [node_4] - if ([node_4.condition]) { - goto label_7 - } - } - [node_5] - while ([node_6.condition]) { - [node_6] - } - } - label_7: - [node_7] - ``` - - This function aims to detect such loops, then emit them in a useful way. - - If `options.loop_rerolling` is disabled, the bottom, more literal - interpretation, is emitted. - - Otherwise, the following is emitted: - ```c - [node_0] - [node_1_MODIFIED] - [node_2] - while ([node_3.condition]) { - [node_3] - } - [node_7] - ``` - where the MODIFIED suffix indicates that we will not be using the - above-mentioned `length % 4`, but instead just `length`. Note that - this interpretation discards all of nodes 4, 5, and 6, as well as - several needless short-circuits. - - As you can see, this drastically modified output may be incorrect, - and can be disabled using the --no-reroll flag. - """ - - node_1 = start.fallthrough_edge - node_7 = start.conditional_edge - - if not isinstance(node_1, ConditionalNode): - return None - node_2 = node_1.fallthrough_edge - node_5 = node_1.conditional_edge - - if not isinstance(node_2, BasicNode): - return None - node_3 = node_2.successor - - if not ( - isinstance(node_3, ConditionalNode) - and node_3.is_loop() - and node_3.conditional_edge.block.index == node_3.block.index - ): - return None - node_4 = node_3.fallthrough_edge - - if not ( - isinstance(node_4, ConditionalNode) - and node_4.fallthrough_edge.block.index == node_5.block.index - and node_4.conditional_edge.block.index == node_7.block.index - ): - return None - - if not isinstance(node_5, BasicNode): - return None - node_6 = node_5.successor - - if not ( - isinstance(node_6, ConditionalNode) - and node_6.is_loop() - and node_6.conditional_edge.block.index == node_6.block.index - and node_6.fallthrough_edge.block.index == node_7.block.index - ): - return None - - assert isinstance(start.block.block_info, BlockInfo) - assert isinstance(node_1.block.block_info, BlockInfo) - assert isinstance(node_3.block.block_info, BlockInfo) - assert isinstance(node_4.block.block_info, BlockInfo) - assert isinstance(node_6.block.block_info, BlockInfo) - assert start.block.block_info.branch_condition - assert node_1.block.block_info.branch_condition - assert node_3.block.block_info.branch_condition - assert node_4.block.block_info.branch_condition - assert node_6.block.block_info.branch_condition - - main_body = Body(False, []) - emit_node(context, node_1, main_body, indent + 4) - - first_loop_metabody = Body(False, []) - emit_node(context, node_2, first_loop_metabody, indent + 8) - first_loop_body = Body(False, []) - emit_node(context, node_3, first_loop_body, indent + 12) - first_loop_while = DoWhileLoop( - indent + 8, - context.options.coding_style, - first_loop_body, - node_3.block.block_info.branch_condition, - ) - first_loop_metabody.add_statement(first_loop_while) - emit_node(context, node_4, first_loop_metabody, indent + 8) - first_loop_metabody.add_statement( - IfElseStatement( - node_4.block.block_info.branch_condition, - indent + 8, - context.options.coding_style, - Body(False, [create_goto(context, node_7, indent + 12)]), - ) - ) - - first_loop_if = IfElseStatement( - node_1.block.block_info.branch_condition.negated(), - indent + 4, - context.options.coding_style, - first_loop_metabody, - ) - main_body.add_statement(first_loop_if) - emit_node(context, node_5, main_body, indent + 4) - second_loop_body = Body(False, []) - emit_node(context, node_6, second_loop_body, indent + 8) - main_body.add_statement( - DoWhileLoop( - indent + 4, - context.options.coding_style, - second_loop_body, - node_6.block.block_info.branch_condition, - ), - ) - - should_loop = IfElseStatement( - start.block.block_info.branch_condition.negated(), - indent, - context.options.coding_style, - main_body, - ) - if not context.options.loop_rerolling: - return (start, should_loop, node_7) - - # [node_0] - # [node_1_MODIFIED] - # [node_2] - # while ([node_3.condition]) { - # [node_3] - # } - # [node_7] - to_write = node_1.block.block_info.to_write - original_remainder_taker = to_write[0] - assert isinstance(original_remainder_taker, EvalOnceStmt) - original_expr = original_remainder_taker.expr - assert isinstance(original_expr, EvalOnceExpr) - original_binop = original_expr.wrapped_expr - assert isinstance(original_binop, BinaryOp) - # !!! This is the only line that actually does anything !!! - # This is what replaces the "& 3" with nothing. - new_expr = attr.evolve(original_expr, wrapped_expr=original_binop.left) - no_taking_remainder = attr.evolve(original_remainder_taker, expr=new_expr) - new_to_write = [no_taking_remainder, *to_write[1:]] - new_block_info = attr.evolve(node_1.block.block_info, to_write=new_to_write) - new_block = attr.evolve(node_1.block) - new_block.block_info = new_block_info - node_1_modified = attr.evolve(node_1, block=new_block) - dedented_while_body = Body(False, []) - emit_node(context, node_3, dedented_while_body, indent + 4) - dedented_while = attr.evolve( - first_loop_while, indent=indent, body=dedented_while_body - ) - return (start, (node_1_modified, node_2, dedented_while), node_7) - - def build_flowgraph_between( context: Context, start: Node, end: Node, indent: int ) -> Body: @@ -892,22 +684,6 @@ def build_flowgraph_between( curr_start = curr_end continue - # Same thing for giant unrolled loops: - unrolled_loop = pattern_match_against_unrolled_while_loop( - context, curr_start, indent - ) - if unrolled_loop: - (_, statement, curr_end) = unrolled_loop - if isinstance(statement, IfElseStatement): - body.add_if_else(statement) - else: - (node1, node2, do_while) = statement - emit_node(context, node1, body, indent) - emit_node(context, node2, body, indent) - body.add_do_while_loop(do_while) - curr_start = curr_end - continue - # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go diff --git a/tests/end_to_end/loop/irix-o2-no-andor-flags.txt b/tests/end_to_end/loop/irix-o2-no-andor-flags.txt new file mode 100644 index 00000000..751e7042 --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-no-andor-flags.txt @@ -0,0 +1 @@ +--no-andor diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-andor-out.c similarity index 100% rename from tests/end_to_end/loop/irix-o2-no-reroll-out.c rename to tests/end_to_end/loop/irix-o2-no-andor-out.c diff --git a/tests/end_to_end/loop/irix-o2-no-reroll.s b/tests/end_to_end/loop/irix-o2-no-andor.s similarity index 100% rename from tests/end_to_end/loop/irix-o2-no-reroll.s rename to tests/end_to_end/loop/irix-o2-no-andor.s diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt b/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt deleted file mode 100644 index a0847c20..00000000 --- a/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt +++ /dev/null @@ -1 +0,0 @@ ---no-reroll From f637685d1d548e5afadf2343a4e2a0c514d42a73 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 29 Apr 2020 21:15:10 -0700 Subject: [PATCH 19/54] More CRLF --- tests/end_to_end/mk64_unknown_1/irix-o2.s | 104 +++++++++++----------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2.s b/tests/end_to_end/mk64_unknown_1/irix-o2.s index ffe00539..2d014540 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2.s +++ b/tests/end_to_end/mk64_unknown_1/irix-o2.s @@ -1,53 +1,53 @@ -.set noat # allow manual use of $at -.set noreorder # don't insert nops after branches - - -glabel test -/* 11837C 802AED6C 00047602 */ srl $t6, $a0, 0x18 -/* 118380 802AED70 000E7880 */ sll $t7, $t6, 2 -/* 118384 802AED74 3C188015 */ lui $t8, 0x8015 -/* 118388 802AED78 030FC021 */ addu $t8, $t8, $t7 -/* 11838C 802AED7C 3C0100FF */ lui $at, (0x00FFFFFF >> 16) # lui $at, 0xff -/* 118390 802AED80 8F180258 */ lw $t8, 0x258($t8) -/* 118394 802AED84 3421FFFF */ ori $at, (0x00FFFFFF & 0xFFFF) # ori $at, $at, 0xffff -/* 118398 802AED88 0081C824 */ and $t9, $a0, $at -/* 11839C 802AED8C 3C018000 */ lui $at, 0x8000 -/* 1183A0 802AED90 03191021 */ addu $v0, $t8, $t9 -/* 1183A4 802AED94 00411021 */ addu $v0, $v0, $at -/* 1183A8 802AED98 10A0001E */ beqz $a1, .L802AEE14 -/* 1183AC 802AED9C 00001825 */ move $v1, $zero -/* 1183B0 802AEDA0 30A80003 */ andi $t0, $a1, 3 -/* 1183B4 802AEDA4 1100000C */ beqz $t0, .L802AEDD8 -/* 1183B8 802AEDA8 01003825 */ move $a3, $t0 -/* 1183BC 802AEDAC 3C0A8016 */ lui $t2, %hi(D_8015F668) # $t2, 0x8016 -/* 1183C0 802AEDB0 254AF668 */ addiu $t2, %lo(D_8015F668) # addiu $t2, $t2, -0x998 -/* 1183C4 802AEDB4 00064880 */ sll $t1, $a2, 2 -/* 1183C8 802AEDB8 012A2021 */ addu $a0, $t1, $t2 -.L802AEDBC: -/* 1183CC 802AEDBC AC820000 */ sw $v0, ($a0) -/* 1183D0 802AEDC0 24630001 */ addiu $v1, $v1, 1 -/* 1183D4 802AEDC4 24420010 */ addiu $v0, $v0, 0x10 -/* 1183D8 802AEDC8 24C60001 */ addiu $a2, $a2, 1 -/* 1183DC 802AEDCC 14E3FFFB */ bne $a3, $v1, .L802AEDBC -/* 1183E0 802AEDD0 24840004 */ addiu $a0, $a0, 4 -/* 1183E4 802AEDD4 1065000F */ beq $v1, $a1, .L802AEE14 -.L802AEDD8: -/* 1183E8 802AEDD8 3C0C8016 */ lui $t4, %hi(D_8015F668) # $t4, 0x8016 -/* 1183EC 802AEDDC 258CF668 */ addiu $t4, %lo(D_8015F668) # addiu $t4, $t4, -0x998 -/* 1183F0 802AEDE0 00065880 */ sll $t3, $a2, 2 -/* 1183F4 802AEDE4 016C2021 */ addu $a0, $t3, $t4 -.L802AEDE8: -/* 1183F8 802AEDE8 AC820000 */ sw $v0, ($a0) -/* 1183FC 802AEDEC 24420010 */ addiu $v0, $v0, 0x10 -/* 118400 802AEDF0 AC820004 */ sw $v0, 4($a0) -/* 118404 802AEDF4 24420010 */ addiu $v0, $v0, 0x10 -/* 118408 802AEDF8 AC820008 */ sw $v0, 8($a0) -/* 11840C 802AEDFC 24420010 */ addiu $v0, $v0, 0x10 -/* 118410 802AEE00 AC82000C */ sw $v0, 0xc($a0) -/* 118414 802AEE04 24630004 */ addiu $v1, $v1, 4 -/* 118418 802AEE08 24420010 */ addiu $v0, $v0, 0x10 -/* 11841C 802AEE0C 1465FFF6 */ bne $v1, $a1, .L802AEDE8 -/* 118420 802AEE10 24840010 */ addiu $a0, $a0, 0x10 -.L802AEE14: -/* 118424 802AEE14 03E00008 */ jr $ra +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 11837C 802AED6C 00047602 */ srl $t6, $a0, 0x18 +/* 118380 802AED70 000E7880 */ sll $t7, $t6, 2 +/* 118384 802AED74 3C188015 */ lui $t8, 0x8015 +/* 118388 802AED78 030FC021 */ addu $t8, $t8, $t7 +/* 11838C 802AED7C 3C0100FF */ lui $at, (0x00FFFFFF >> 16) # lui $at, 0xff +/* 118390 802AED80 8F180258 */ lw $t8, 0x258($t8) +/* 118394 802AED84 3421FFFF */ ori $at, (0x00FFFFFF & 0xFFFF) # ori $at, $at, 0xffff +/* 118398 802AED88 0081C824 */ and $t9, $a0, $at +/* 11839C 802AED8C 3C018000 */ lui $at, 0x8000 +/* 1183A0 802AED90 03191021 */ addu $v0, $t8, $t9 +/* 1183A4 802AED94 00411021 */ addu $v0, $v0, $at +/* 1183A8 802AED98 10A0001E */ beqz $a1, .L802AEE14 +/* 1183AC 802AED9C 00001825 */ move $v1, $zero +/* 1183B0 802AEDA0 30A80003 */ andi $t0, $a1, 3 +/* 1183B4 802AEDA4 1100000C */ beqz $t0, .L802AEDD8 +/* 1183B8 802AEDA8 01003825 */ move $a3, $t0 +/* 1183BC 802AEDAC 3C0A8016 */ lui $t2, %hi(D_8015F668) # $t2, 0x8016 +/* 1183C0 802AEDB0 254AF668 */ addiu $t2, %lo(D_8015F668) # addiu $t2, $t2, -0x998 +/* 1183C4 802AEDB4 00064880 */ sll $t1, $a2, 2 +/* 1183C8 802AEDB8 012A2021 */ addu $a0, $t1, $t2 +.L802AEDBC: +/* 1183CC 802AEDBC AC820000 */ sw $v0, ($a0) +/* 1183D0 802AEDC0 24630001 */ addiu $v1, $v1, 1 +/* 1183D4 802AEDC4 24420010 */ addiu $v0, $v0, 0x10 +/* 1183D8 802AEDC8 24C60001 */ addiu $a2, $a2, 1 +/* 1183DC 802AEDCC 14E3FFFB */ bne $a3, $v1, .L802AEDBC +/* 1183E0 802AEDD0 24840004 */ addiu $a0, $a0, 4 +/* 1183E4 802AEDD4 1065000F */ beq $v1, $a1, .L802AEE14 +.L802AEDD8: +/* 1183E8 802AEDD8 3C0C8016 */ lui $t4, %hi(D_8015F668) # $t4, 0x8016 +/* 1183EC 802AEDDC 258CF668 */ addiu $t4, %lo(D_8015F668) # addiu $t4, $t4, -0x998 +/* 1183F0 802AEDE0 00065880 */ sll $t3, $a2, 2 +/* 1183F4 802AEDE4 016C2021 */ addu $a0, $t3, $t4 +.L802AEDE8: +/* 1183F8 802AEDE8 AC820000 */ sw $v0, ($a0) +/* 1183FC 802AEDEC 24420010 */ addiu $v0, $v0, 0x10 +/* 118400 802AEDF0 AC820004 */ sw $v0, 4($a0) +/* 118404 802AEDF4 24420010 */ addiu $v0, $v0, 0x10 +/* 118408 802AEDF8 AC820008 */ sw $v0, 8($a0) +/* 11840C 802AEDFC 24420010 */ addiu $v0, $v0, 0x10 +/* 118410 802AEE00 AC82000C */ sw $v0, 0xc($a0) +/* 118414 802AEE04 24630004 */ addiu $v1, $v1, 4 +/* 118418 802AEE08 24420010 */ addiu $v0, $v0, 0x10 +/* 11841C 802AEE0C 1465FFF6 */ bne $v1, $a1, .L802AEDE8 +/* 118420 802AEE10 24840010 */ addiu $a0, $a0, 0x10 +.L802AEE14: +/* 118424 802AEE14 03E00008 */ jr $ra /* 118428 802AEE18 00000000 */ nop \ No newline at end of file From 2d58683e5df6579815c264902a01035e92c3317a Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 29 Apr 2020 21:17:41 -0700 Subject: [PATCH 20/54] Another flip-flop, using --no-reroll for a test --- .../loop/irix-o2-no-andor-flags.txt | 1 - tests/end_to_end/loop/irix-o2-no-andor-out.c | 54 ------------------- .../loop/irix-o2-no-reroll-flags.txt | 1 + tests/end_to_end/loop/irix-o2-no-reroll-out.c | 1 + ...irix-o2-no-andor.s => irix-o2-no-reroll.s} | 0 5 files changed, 2 insertions(+), 55 deletions(-) delete mode 100644 tests/end_to_end/loop/irix-o2-no-andor-flags.txt delete mode 100644 tests/end_to_end/loop/irix-o2-no-andor-out.c create mode 100644 tests/end_to_end/loop/irix-o2-no-reroll-flags.txt create mode 100644 tests/end_to_end/loop/irix-o2-no-reroll-out.c rename tests/end_to_end/loop/{irix-o2-no-andor.s => irix-o2-no-reroll.s} (100%) diff --git a/tests/end_to_end/loop/irix-o2-no-andor-flags.txt b/tests/end_to_end/loop/irix-o2-no-andor-flags.txt deleted file mode 100644 index 751e7042..00000000 --- a/tests/end_to_end/loop/irix-o2-no-andor-flags.txt +++ /dev/null @@ -1 +0,0 @@ ---no-andor diff --git a/tests/end_to_end/loop/irix-o2-no-andor-out.c b/tests/end_to_end/loop/irix-o2-no-andor-out.c deleted file mode 100644 index 7b8dd9da..00000000 --- a/tests/end_to_end/loop/irix-o2-no-andor-out.c +++ /dev/null @@ -1,54 +0,0 @@ -s32 test(void *arg0, s32 arg1) -{ - s32 temp_a3; - s32 temp_v0; - s32 temp_v0_2; - void *temp_v1; - void *phi_v1; - s32 phi_v0; - void *phi_v1_2; - s32 phi_v0_2; - s32 phi_return; - s32 phi_v0_3; - - phi_return = 0; - if (arg1 > 0) - { - temp_a3 = arg1 & 3; - phi_v0_3 = 0; - if (temp_a3 != 0) - { - phi_v1 = arg0; - phi_v0 = 0; - do - { - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - } while ((temp_a3 != temp_v0)); - phi_return = temp_v0; - phi_v0_3 = temp_v0; - if (temp_v0 == arg1) - { - goto block_7; - } - } - phi_v1_2 = arg0 + phi_v0_3; - phi_v0_2 = phi_v0_3; - do - { - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - } while ((temp_v0_2 != arg1)); - } -block_7: - return phi_return; -} diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt b/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt new file mode 100644 index 00000000..a0847c20 --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-no-reroll-flags.txt @@ -0,0 +1 @@ +--no-reroll diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-reroll-out.c new file mode 100644 index 00000000..1cb4f301 --- /dev/null +++ b/tests/end_to_end/loop/irix-o2-no-reroll-out.c @@ -0,0 +1 @@ +CRASHED diff --git a/tests/end_to_end/loop/irix-o2-no-andor.s b/tests/end_to_end/loop/irix-o2-no-reroll.s similarity index 100% rename from tests/end_to_end/loop/irix-o2-no-andor.s rename to tests/end_to_end/loop/irix-o2-no-reroll.s From 8a5bf38afb21bd0d17d90dbb7ce35e01ddba0614 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Thu, 30 Apr 2020 22:59:42 -0700 Subject: [PATCH 21/54] Refactor all spaghetti AND fix a bug --- src/flow_graph.py | 50 +++++- src/flow_graph_munge.py | 170 +++++++----------- src/translate.py | 9 +- tests/end_to_end/loop/irix-o2-out.c | 26 +-- tests/end_to_end/loop_nested/irix-o2-out.c | 32 ++-- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 34 ++-- 6 files changed, 177 insertions(+), 144 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index ea94b808..b0c2b641 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -1,3 +1,4 @@ +import abc import copy import typing from typing import Any, Dict, Iterator, List, Optional, Set, Tuple, Union @@ -512,11 +513,16 @@ def is_loop_edge(node: "Node", edge: "Node") -> bool: return edge.block.index <= node.block.index +# TODO: Nodes need to be frozen. +# We use dictionaries and sets of nodes throughout the codebase, +# and it would be helpful to define __eq__ using `self.block.index`. +# This would also keep code from silently modifying Nodes. +# It just makes more sense. @attr.s(eq=False) -class BaseNode: +class BaseNode(abc.ABC): block: Block = attr.ib() emit_goto: bool = attr.ib() - parents: List["Node"] = attr.ib(init=False, factory=list) + parents: Set["Node"] = attr.ib(init=False, factory=set) dominators: Set["Node"] = attr.ib(init=False, factory=set) immediate_dominator: Optional["Node"] = attr.ib(init=False, default=None) immediately_dominates: List["Node"] = attr.ib(init=False, factory=list) @@ -530,7 +536,19 @@ def to_basic_node(self, successor: "Node") -> "BasicNode": return new_node def add_parent(self, parent: "Node") -> None: - self.parents.append(parent) + self.parents.add(parent) + + def remove_parent(self, parent: "Node") -> None: + self.parents.remove(parent) + + def replace_parent(self, replace_this: "Node", with_this: "Node") -> None: + if replace_this in self.parents: + self.parents.remove(replace_this) + self.parents.add(with_this) + + @abc.abstractmethod + def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: + ... def name(self) -> str: return str(self.block.index) @@ -540,6 +558,11 @@ def name(self) -> str: class BasicNode(BaseNode): successor: "Node" = attr.ib() + def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: + if self.successor is replace_this: + self.successor = with_this + with_this.add_parent(self) + def is_loop(self) -> bool: return is_loop_edge(self, self.successor) @@ -558,7 +581,13 @@ class ConditionalNode(BaseNode): conditional_edge: "Node" = attr.ib() fallthrough_edge: "Node" = attr.ib() - marked_to_remove_remainder_op: bool = attr.ib(default=False) + def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: + if self.conditional_edge is replace_this: + self.conditional_edge = with_this + with_this.add_parent(self) + if self.fallthrough_edge is replace_this: + self.fallthrough_edge = with_this + with_this.add_parent(self) def is_loop(self) -> bool: return is_loop_edge(self, self.conditional_edge) @@ -580,6 +609,9 @@ def __str__(self) -> str: class ReturnNode(BaseNode): index: int = attr.ib() + def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: + pass + def name(self) -> str: name = super().name() return name if self.is_real() else f"{name}.{self.index}" @@ -595,6 +627,16 @@ def __str__(self) -> str: class SwitchNode(BaseNode): cases: List["Node"] = attr.ib() + def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: + new_cases: List["Node"] = [] + for case in self.cases: + if case is replace_this: + new_cases.append(with_this) + with_this.add_parent(self) + else: + new_cases.append(case) + self.cases = new_cases + def __str__(self) -> str: targets = ", ".join(str(c.block.index) for c in self.cases) return f"{self.block}\n# {self.block.index} -> {targets}" diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py index 7c7b78e1..38e16765 100644 --- a/src/flow_graph_munge.py +++ b/src/flow_graph_munge.py @@ -8,144 +8,112 @@ from .parse_instruction import Instruction -def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> Optional[FlowGraph]: +def replace_node_references( + flow_graph: FlowGraph, replace_this: Node, with_this: Node, replace_parent: bool +) -> None: + for node_to_modify in flow_graph.nodes: + node_to_modify.replace_any_children(replace_this, with_this) + if replace_this in node_to_modify.parents: + if replace_parent: + node_to_modify.replace_parent(replace_this, with_this) + else: + node_to_modify.remove_parent(replace_this) + compute_dominators(flow_graph.nodes) + + +def remove_node(flow_graph: FlowGraph, to_delete: Node, new_child: Node) -> None: + flow_graph.nodes.remove(to_delete) + replace_node_references(flow_graph, to_delete, new_child, False) + + +def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> None: + replacement_index = flow_graph.nodes.index(replace_this) + flow_graph.nodes[replacement_index] = with_this + replace_node_references(flow_graph, replace_this, with_this, True) + + +def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: node_1 = start.fallthrough_edge node_7 = start.conditional_edge if not isinstance(node_1, ConditionalNode): - return None + return False node_2 = node_1.fallthrough_edge node_5 = node_1.conditional_edge if not isinstance(node_2, BasicNode): - return None + return False node_3 = node_2.successor if not ( isinstance(node_3, ConditionalNode) and node_3.is_loop() - and node_3.conditional_edge.block.index == node_3.block.index + and node_3.conditional_edge is node_3 ): - return None + return False node_4 = node_3.fallthrough_edge if not ( isinstance(node_4, ConditionalNode) - and node_4.fallthrough_edge.block.index == node_5.block.index - and node_4.conditional_edge.block.index == node_7.block.index + and node_4.fallthrough_edge is node_5 + and node_4.conditional_edge is node_7 ): - return None + return False if not isinstance(node_5, BasicNode): - return None + return False node_6 = node_5.successor if not ( isinstance(node_6, ConditionalNode) and node_6.is_loop() - and node_6.conditional_edge.block.index == node_6.block.index - and node_6.fallthrough_edge.block.index == node_7.block.index + and node_6.conditional_edge is node_6 + and node_6.fallthrough_edge is node_7 ): - return None - - modified_node_3 = attr.evolve( - node_3, fallthrough_edge=node_7, marked_to_remove_remainder_op=True, - ) - modified_node_3.conditional_edge = modified_node_3 - modified_node_2 = attr.evolve(node_2, successor=modified_node_3) - modified_node_3.parents = [modified_node_2, modified_node_3] - # Need to delete deleted nodes ONLY (may have other still-relevant parents) - node_7.parents.append(modified_node_3) - if start in node_7.parents: - del node_7.parents[node_7.parents.index(start)] - if node_4 in node_7.parents: - del node_7.parents[node_7.parents.index(node_4)] - if node_5 in node_7.parents: - del node_7.parents[node_7.parents.index(node_5)] - if node_6 in node_7.parents: - del node_7.parents[node_7.parents.index(node_6)] - - new_instructions_1 = copy(node_1.block.instructions) - branches = list( - filter(lambda instr: instr.is_branch_instruction(), new_instructions_1) - ) - assert len(branches) == 1 - del new_instructions_1[new_instructions_1.index(branches[0])] - # TODO: also remove & 3 here - andis = list(filter(lambda instr: instr.mnemonic == "andi", new_instructions_1)) - assert len(andis) == 1 - new_instructions_1[new_instructions_1.index(andis[0])] = Instruction( - mnemonic="move", args=[andis[0].args[0], andis[0].args[1]] - ) - - new_block_1 = attr.evolve(node_1.block, instructions=new_instructions_1) - modified_node_1 = attr.evolve( - node_1.to_basic_node(successor=modified_node_2), block=new_block_1 - ) - modified_node_2.parents = [modified_node_1] - - new_instructions_0 = copy(start.block.instructions) - branches = list( - filter(lambda instr: instr.is_branch_instruction(), new_instructions_0) - ) - assert len(branches) == 1 - del new_instructions_0[new_instructions_0.index(branches[0])] - new_block_0 = attr.evolve(start.block, instructions=new_instructions_0) - modified_node_0 = attr.evolve( - start.to_basic_node(successor=modified_node_1), block=new_block_0 + return False + + def modify_node_1_instructions(instructions: List[Instruction]) -> bool: + # First, we check that the node has the instructions we + # think it has. + branches = [instr for instr in instructions if instr.is_branch_instruction()] + if len(branches) != 1: + return False + andi_instrs = [instr for instr in instructions if instr.mnemonic == "andi"] + if len(andi_instrs) != 1: + return False + # We are now free to modify the instructions, as we have verified + # that this node fits the criteria. + instructions.remove(branches[0]) + andi = andi_instrs[0] + move = Instruction("move", [andi.args[0], andi.args[1]]) + instructions[instructions.index(andi)] = move + return True + + if not modify_node_1_instructions(node_1.block.instructions): + return False + + new_node_1 = node_1.to_basic_node( + successor=node_2 # node_2 doesn't know it's a parent yet ) - modified_node_0.parents = start.parents - # Behold, the most confusing for-loop ever written. - for parent in start.parents: - if isinstance(parent, ConditionalNode): - if start.block.index == parent.fallthrough_edge.block.index: - parent.fallthrough_edge = modified_node_0 - if start.block.index == parent.conditional_edge.block.index: - parent.conditional_edge = modified_node_0 - elif isinstance(parent, BasicNode): - if start.block.index == parent.successor.block.index: - parent.successor = modified_node_0 - modified_node_1.parents = [modified_node_0] - - # back to node_7: - if start in node_7.parents: - node_7.parents[node_7.parents.index(start)] = modified_node_0 - if node_1 in node_7.parents: - node_7.parents[node_7.parents.index(node_1)] = modified_node_1 - if node_2 in node_7.parents: - node_7.parents[node_7.parents.index(node_2)] = modified_node_2 - - # TODO: does copy() work? - new_nodes = copy(flow_graph.nodes) - # TODO: do we need to reinterpret .parents? - new_nodes[new_nodes.index(node_3)] = modified_node_3 - new_nodes[new_nodes.index(node_2)] = modified_node_2 - new_nodes[new_nodes.index(node_1)] = modified_node_1 - new_nodes[new_nodes.index(start)] = modified_node_0 - - del new_nodes[new_nodes.index(node_4)] - del new_nodes[new_nodes.index(node_5)] - del new_nodes[new_nodes.index(node_6)] - - compute_dominators(new_nodes) - return attr.evolve(flow_graph, nodes=new_nodes) + replace_node(flow_graph, node_1, new_node_1) # now it does + remove_node(flow_graph, node_4, node_7) + remove_node(flow_graph, node_5, node_7) + remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything? + return True def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: - # TODO: This is horrible, probably not what I want. - # What if knocking out nodes 4, 5, 6 just reveals another - # set of nodes that look identical? We will incorrectly - # be merging two adjacent for-loops. + # TODO: What if knocking out nodes reveals another set of nodes + # that look identical? We will incorrectly be merging two + # adjacent for-loops. changed: bool = True while changed: changed = False for node in flow_graph.nodes: if not isinstance(node, ConditionalNode): continue - new_flow_graph = unroll_loop(flow_graph, node) - if new_flow_graph: - flow_graph = new_flow_graph - changed = True + changed = unroll_loop(flow_graph, node) + if changed: break return flow_graph diff --git a/src/translate.py b/src/translate.py index 8d541307..c3fd77c8 100644 --- a/src/translate.py +++ b/src/translate.py @@ -4,6 +4,7 @@ import traceback import typing from contextlib import contextmanager +from copy import copy from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union import attr @@ -2096,7 +2097,7 @@ def regs_clobbered_until_dominator( if node.immediate_dominator is None: return set() seen = set([node.immediate_dominator]) - stack = node.parents[:] + stack = copy(node.parents) clobbered = set() while stack: n = stack.pop() @@ -2108,7 +2109,7 @@ def regs_clobbered_until_dominator( clobbered.update(output_regs_for_instr(instr, typemap)) if instr.mnemonic in CASES_FN_CALL: clobbered.update(TEMP_REGS) - stack.extend(n.parents) + stack.update(n.parents) return clobbered @@ -2118,7 +2119,7 @@ def reg_always_set( if node.immediate_dominator is None: return False seen = set([node.immediate_dominator]) - stack = node.parents[:] + stack = copy(node.parents) while stack: n = stack.pop() if n == node.immediate_dominator and not dom_set: @@ -2136,7 +2137,7 @@ def reg_always_set( if clobbered == True: return False if clobbered is None: - stack.extend(n.parents) + stack.update(n.parents) return True diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 9d949587..404520f4 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -3,17 +3,23 @@ s32 test(void *arg0, s32 arg1) s32 temp_v0; void *phi_v1; s32 phi_v0; + s32 phi_return; - phi_v1 = arg0; - phi_v0 = 0; -loop_3: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - if (arg1 != temp_v0) + phi_return = 0; + if (arg1 > 0) { - goto loop_3; + phi_v1 = arg0; + phi_v0 = 0; +loop_3: + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + phi_return = temp_v0; + if (arg1 != temp_v0) + { + goto loop_3; + } } - return temp_v0; + return phi_return; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index fa8cf8fb..fbcdca8c 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -6,29 +6,37 @@ s32 test(s32 arg0) s32 phi_v0; s32 phi_v1; s32 phi_v1_2; + s32 phi_v1_3; + s32 phi_v1_4; s32 phi_a2; phi_v0 = 0; phi_v1 = 0; - phi_v1_2 = 0; + phi_v1_3 = 0; if (arg0 > 0) { loop_1: - phi_a3 = 1; - phi_a2 = phi_v0 * 0; -loop_4: - temp_v1 = phi_v1_2 + phi_a2; - phi_a3 = phi_a3 + 1; - phi_v1_2 = temp_v1; - phi_a2 = phi_a2 + phi_v0; - if (arg0 != phi_a3) + phi_v1_2 = phi_v1_3; + if (arg0 > 0) { - goto loop_4; + phi_a3 = 1; + phi_v1_4 = phi_v1_3; + phi_a2 = phi_v0 * 0; +loop_4: + temp_v1 = phi_v1_4 + phi_a2; + phi_a3 = phi_a3 + 1; + phi_v1_2 = temp_v1; + phi_v1_4 = temp_v1; + phi_a2 = phi_a2 + phi_v0; + if (arg0 != phi_a3) + { + goto loop_4; + } } temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; - phi_v1 = temp_v1; - phi_v1_2 = temp_v1; + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; if (temp_v0 != arg0) { goto loop_1; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index bb08a876..426e651b 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -1,24 +1,32 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) { s32 temp_v0; + s32 temp_v0_2; s32 temp_v1; s32 phi_v0; void *phi_a0; s32 phi_v1; + s32 phi_return; - phi_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; - phi_a0 = (arg2 * 4) + &D_8015F668; - phi_v1 = 0; -loop_3: - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0 = phi_v0 + 0x10; - phi_v0 = temp_v0; - phi_a0 = phi_a0 + 4; - phi_v1 = temp_v1; - if (arg1 != temp_v1) + temp_v0 = ((0x80150000 + ((arg0 >> 0x18) * 4))->unk258 + (arg0 & 0xFFFFFF)) + 0x80000000; + phi_return = temp_v0; + if (arg1 != 0) { - goto loop_3; + phi_v0 = temp_v0; + phi_a0 = (arg2 * 4) + &D_8015F668; + phi_v1 = 0; +loop_3: + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_return = temp_v0_2; + if (arg1 != temp_v1) + { + goto loop_3; + } } - return temp_v0; + return phi_return; } From 2fe2c7d479ebf7e2ba10a766a227a3517e87b91a Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Thu, 30 Apr 2020 23:35:55 -0700 Subject: [PATCH 22/54] Re-enable do-while emission --- src/flow_graph.py | 7 ++ src/flow_graph_munge.py | 6 +- src/if_statements.py | 71 ++++++------------- tests/end_to_end/loop/irix-g-out.c | 2 +- tests/end_to_end/loop/irix-o2-out.c | 16 ++--- tests/end_to_end/loop_nested/irix-g-out.c | 2 +- tests/end_to_end/loop_nested/irix-o2-out.c | 16 ++--- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 20 +++--- .../end_to_end/multiple-assigns/irix-g-out.c | 2 +- .../end_to_end/multiple-assigns/irix-o2-out.c | 38 +++++----- 10 files changed, 74 insertions(+), 106 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index b0c2b641..31783dcb 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -508,6 +508,10 @@ def process(item: Union[Instruction, Label]) -> None: return block_builder.get_blocks() +def is_self_loop_edge(node: "Node", edge: "Node") -> bool: + return edge.block.index == node.block.index + + def is_loop_edge(node: "Node", edge: "Node") -> bool: # Loops are represented by backwards jumps. return edge.block.index <= node.block.index @@ -589,6 +593,9 @@ def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: self.fallthrough_edge = with_this with_this.add_parent(self) + def is_self_loop(self) -> bool: + return is_self_loop_edge(self, self.conditional_edge) + def is_loop(self) -> bool: return is_loop_edge(self, self.conditional_edge) diff --git a/src/flow_graph_munge.py b/src/flow_graph_munge.py index 38e16765..01a6039b 100644 --- a/src/flow_graph_munge.py +++ b/src/flow_graph_munge.py @@ -1,8 +1,4 @@ -import typing -from copy import copy -from typing import List, Optional, Tuple - -import attr +from typing import List from .flow_graph import BasicNode, ConditionalNode, FlowGraph, Node, compute_dominators from .parse_instruction import Instruction diff --git a/src/if_statements.py b/src/if_statements.py index 397810d8..4bb5fefc 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Set, Tuple, Union +from typing import Dict, List, Optional, Set, Tuple, Union import attr @@ -20,14 +20,9 @@ Condition, Expression, FunctionInfo, -) -from .translate import Statement as TranslateStatement -from .translate import ( Type, simplify_condition, stringify_expr, - EvalOnceStmt, - EvalOnceExpr, ) @@ -124,12 +119,9 @@ def should_write(self) -> bool: def __str__(self) -> str: space = " " * self.indent - space_2 = " " * (self.indent + 4) - space_3 = " " * (self.indent + 8) - brace_after_if = f"\n{space}{{" if self.coding_style.newline_after_if else " {" brace_after_do = f"\n{space}{{" if self.coding_style.newline_after_if else " {" - cond = str(self.condition).rstrip(";") if self.condition else "" + cond = stringify_expr(self.condition).rstrip(";") if self.condition else "" body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ f"{space}do{brace_after_do}{body}", @@ -555,42 +547,24 @@ def add_return_statement( body.add_statement(SimpleStatement(indent, "return;")) -def pattern_match_against_simple_do_while_loop( +def pattern_match_simple_do_while_loop( context: Context, start: ConditionalNode, indent: int -) -> Optional[Tuple[Node, IfElseStatement, Node]]: - node_1 = start.fallthrough_edge - node_2 = start.conditional_edge - - if not ( - isinstance(node_1, ConditionalNode) - and node_1.is_loop() - # TODO: could also use id() here?: - and node_1.conditional_edge.block.index == node_1.block.index - and node_1.fallthrough_edge.block.index == node_2.block.index - ): +) -> Optional[DoWhileLoop]: + if not start.is_self_loop(): return None - assert isinstance(start.block.block_info, BlockInfo) - assert isinstance(node_1.block.block_info, BlockInfo) + assert start.block.block_info assert start.block.block_info.branch_condition - assert node_1.block.block_info.branch_condition loop_body = Body(False, []) - emit_node(context, node_1, loop_body, indent + 8) - + emit_node(context, start, loop_body, indent + 4) do_while = DoWhileLoop( - indent + 4, - context.options.coding_style, - loop_body, - node_1.block.block_info.branch_condition, - ) - should_loop = IfElseStatement( - start.block.block_info.branch_condition.negated(), indent, context.options.coding_style, - Body(False, [do_while]), + loop_body, + start.block.block_info.branch_condition, ) - return (start, should_loop, node_2) + return do_while def build_flowgraph_between( @@ -612,6 +586,18 @@ def build_flowgraph_between( while curr_start != end: # Write the current node (but return nodes are handled specially). if not isinstance(curr_start, ReturnNode): + # Before we do anything else, we pattern-match the subgraph + # rooted at curr_start against certain predefined subgraphs + # that emit do-while-loops: + if isinstance(curr_start, ConditionalNode): + do_while_loop = pattern_match_simple_do_while_loop( + context, curr_start, indent + ) + if do_while_loop: + body.add_do_while_loop(do_while_loop) + curr_start = curr_start.fallthrough_edge + continue + # If a node is ever encountered twice, we can emit a goto to the # first place we emitted it. Since nodes represent positions in the # assembly, and we use phi's for preserved variable contents, this @@ -671,19 +657,6 @@ def build_flowgraph_between( # In a BasicNode, the successor is the next articulation node. curr_start = curr_start.successor elif isinstance(curr_start, ConditionalNode): - # Before we do anything else, we pattern-match the subgraph - # rooted at curr_start against certain predefined subgraphs - # that emit do-while-loops: - do_while_loop = pattern_match_against_simple_do_while_loop( - context, curr_start, indent - ) - if do_while_loop: - (_, loop_if_statement, curr_end) = do_while_loop - # emit_node(context, curr_start, body, indent) - body.add_if_else(loop_if_statement) - curr_start = curr_end - continue - # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index b0fc2265..2413097f 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -11,6 +11,6 @@ void test(s32 arg0, s32 arg1) *(arg0 + sp4) = (u8)0; temp_t9 = sp4 + 1; sp4 = temp_t9; - } while (((temp_t9 < arg1) != 0)); + } while ((temp_t9 < arg1) != 0); } } diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 404520f4..07a5a589 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -10,16 +10,14 @@ s32 test(void *arg0, s32 arg1) { phi_v1 = arg0; phi_v0 = 0; -loop_3: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 = phi_v1 + 1; - phi_v0 = temp_v0; - phi_return = temp_v0; - if (arg1 != temp_v0) + do { - goto loop_3; - } + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 = phi_v1 + 1; + phi_v0 = temp_v0; + phi_return = temp_v0; + } while (arg1 != temp_v0); } return phi_return; } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index b998c837..607821d9 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -16,7 +16,7 @@ s32 test(s32 arg0) { sp8 = sp8 + (spC * sp4); sp4 = sp4 + 1; - } while (((sp4 < arg0) != 0)); + } while ((sp4 < arg0) != 0); } spC = spC + 1; if (spC < arg0) diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index fbcdca8c..60b2f539 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -22,16 +22,14 @@ s32 test(s32 arg0) phi_a3 = 1; phi_v1_4 = phi_v1_3; phi_a2 = phi_v0 * 0; -loop_4: - temp_v1 = phi_v1_4 + phi_a2; - phi_a3 = phi_a3 + 1; - phi_v1_2 = temp_v1; - phi_v1_4 = temp_v1; - phi_a2 = phi_a2 + phi_v0; - if (arg0 != phi_a3) + do { - goto loop_4; - } + temp_v1 = phi_v1_4 + phi_a2; + phi_a3 = phi_a3 + 1; + phi_v1_2 = temp_v1; + phi_v1_4 = temp_v1; + phi_a2 = phi_a2 + phi_v0; + } while (arg0 != phi_a3); } temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 426e651b..290807a6 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -15,18 +15,16 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) phi_v0 = temp_v0; phi_a0 = (arg2 * 4) + &D_8015F668; phi_v1 = 0; -loop_3: - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - phi_v0 = temp_v0_2; - phi_a0 = phi_a0 + 4; - phi_v1 = temp_v1; - phi_return = temp_v0_2; - if (arg1 != temp_v1) + do { - goto loop_3; - } + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + phi_v0 = temp_v0_2; + phi_a0 = phi_a0 + 4; + phi_v1 = temp_v1; + phi_return = temp_v0_2; + } while (arg1 != temp_v1); } return phi_return; } diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index b60561cd..3a2ef997 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -25,7 +25,7 @@ s32 test(s32 arg0) temp_a0 = temp_a0 + 1; D_410150 = sp4; phi_a0 = temp_a0; - } while ((temp_a0 == 5)); + } while (temp_a0 == 5); } return sp4; } diff --git a/tests/end_to_end/multiple-assigns/irix-o2-out.c b/tests/end_to_end/multiple-assigns/irix-o2-out.c index 4701b999..c8ba8323 100644 --- a/tests/end_to_end/multiple-assigns/irix-o2-out.c +++ b/tests/end_to_end/multiple-assigns/irix-o2-out.c @@ -8,27 +8,25 @@ s32 test(s32 arg0) if (arg0 == 5) { phi_a0 = arg0; -loop_2: - D_410120 = phi_a0; - temp_a0 = phi_a0 + 1; - D_410120 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410120 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410120 = temp_a0; - temp_v1 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410120 = temp_a0; - D_410120 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410120 = temp_a0; - temp_a0 = temp_a0 + 1; - D_410120 = temp_v1; - phi_a0 = temp_a0; - if (temp_a0 == 5) + do { - goto loop_2; - } + D_410120 = phi_a0; + temp_a0 = phi_a0 + 1; + D_410120 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410120 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410120 = temp_a0; + temp_v1 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410120 = temp_a0; + D_410120 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410120 = temp_a0; + temp_a0 = temp_a0 + 1; + D_410120 = temp_v1; + phi_a0 = temp_a0; + } while (temp_a0 == 5); sp4 = temp_v1; } return sp4; From 24e6066d431a27e61b46c935aca88e7a6eb46080 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Fri, 1 May 2020 14:22:09 -0700 Subject: [PATCH 23/54] Temporary while commit --- src/flow_graph.py | 6 ++++++ src/if_statements.py | 3 +++ src/translate.py | 1 - 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 31783dcb..f589e00f 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -550,6 +550,12 @@ def replace_parent(self, replace_this: "Node", with_this: "Node") -> None: self.parents.remove(replace_this) self.parents.add(with_this) + def is_loop(self) -> bool: + return False # overridden by some child classes + + def loop_edges(self) -> List["Node"]: + return list(filter(lambda n: n.is_loop(), self.parents)) + @abc.abstractmethod def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: ... diff --git a/src/if_statements.py b/src/if_statements.py index 4bb5fefc..5549b1d7 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -590,6 +590,9 @@ def build_flowgraph_between( # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: if isinstance(curr_start, ConditionalNode): + loops = curr_start.loop_edges() + for loop in loops: + postdominated do_while_loop = pattern_match_simple_do_while_loop( context, curr_start, indent ) diff --git a/src/translate.py b/src/translate.py index c3fd77c8..6855d683 100644 --- a/src/translate.py +++ b/src/translate.py @@ -23,7 +23,6 @@ Node, ReturnNode, SwitchNode, - build_flowgraph, ) from .options import Options from .parse_file import Rodata From 35178602be0054c2df889a8305b986dc53569b78 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Fri, 1 May 2020 14:22:09 -0700 Subject: [PATCH 24/54] Temporary while commit --- src/if_statements.py | 53 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 4 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 5549b1d7..72bc554b 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -122,9 +122,9 @@ def __str__(self) -> str: brace_after_do = f"\n{space}{{" if self.coding_style.newline_after_if else " {" cond = stringify_expr(self.condition).rstrip(";") if self.condition else "" - body = f"\n".join(f"{stmt}" for stmt in self.body.statements) + body = str(self.body) string_components = [ - f"{space}do{brace_after_do}{body}", + f"{space}do{brace_after_do}\n{body}", f"{space}}} while ({cond});", ] return "\n".join(string_components) @@ -223,6 +223,7 @@ def build_conditional_subgraph( given that "start" is a ConditionalNode; this program will intelligently output if/else relationships. """ + print(f"build_conditional_subgraph({start.block.index}, {end.block.index})") if_block_info = start.block.block_info assert isinstance(if_block_info, BlockInfo) assert if_block_info.branch_condition is not None @@ -558,13 +559,35 @@ def pattern_match_simple_do_while_loop( loop_body = Body(False, []) emit_node(context, start, loop_body, indent + 4) - do_while = DoWhileLoop( + return DoWhileLoop( indent, context.options.coding_style, loop_body, start.block.block_info.branch_condition, ) - return do_while + + +def get_do_while_loop_between( + context: Context, start: ConditionalNode, end: ConditionalNode, indent: int +) -> DoWhileLoop: + assert end.block.block_info + assert end.block.block_info.branch_condition + + # TODO: fallthrough_edge needs to be the right thing here + # the real detection is conditional has a reverse arrow... + # (a self-arrow IS a reverse arrow, so we can even consolidate with the + # above function) + loop_body = build_flowgraph_between( + context, start.fallthrough_edge, end, indent + 4 + ) + emit_node(context, end, loop_body, indent + 4) + + return DoWhileLoop( + indent, + context.options.coding_style, + loop_body, + end.block.block_info.branch_condition, # TODO: negated? + ) def build_flowgraph_between( @@ -578,12 +601,16 @@ def build_flowgraph_between( """ curr_start = start body = Body(print_node_comment=context.options.debug) + print(f"build_flowgraph_between({start.block.index}, {end.block.index})") # We will split this graph into subgraphs, where the entrance and exit nodes # of that subgraph are at the same indentation level. "curr_start" will # iterate through these nodes, which are commonly referred to as # articulation nodes. while curr_start != end: + print(f"...curr_start={curr_start.block.index}") + if curr_start.block.index == 4: + breakpoint() # Write the current node (but return nodes are handled specially). if not isinstance(curr_start, ReturnNode): # Before we do anything else, we pattern-match the subgraph @@ -609,6 +636,7 @@ def build_flowgraph_between( # hints at that situation better than if we just blindly duplicate # the block. if curr_start in context.emitted_nodes: + breakpoint() emit_goto(context, curr_start, body, indent) break context.emitted_nodes.add(curr_start) @@ -660,6 +688,23 @@ def build_flowgraph_between( # In a BasicNode, the successor is the next articulation node. curr_start = curr_start.successor elif isinstance(curr_start, ConditionalNode): + # Once again, before anything else, we pattern match against "big" + # do-while loops. + loops = curr_start.loop_edges() + loops = list( + filter( + lambda n: isinstance(n, ConditionalNode) and not n.is_self_loop(), + loops, + ) + ) + if loops: + curr_end = sorted(loops, key=lambda n: n.block.index, reverse=True)[0] + body.add_do_while_loop( + get_do_while_loop_between(context, curr_start, curr_end, indent) + ) + curr_start = curr_end.fallthrough_edge + continue + # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go From 57f6697b7f930dca34024ca624b6a629945b5b60 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 20:56:04 -0700 Subject: [PATCH 25/54] Throw this commit away --- src/if_statements.py | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 72bc554b..5e985b6d 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -223,7 +223,7 @@ def build_conditional_subgraph( given that "start" is a ConditionalNode; this program will intelligently output if/else relationships. """ - print(f"build_conditional_subgraph({start.block.index}, {end.block.index})") + # print(f"build_conditional_subgraph({start.block.index}, {end.block.index})") if_block_info = start.block.block_info assert isinstance(if_block_info, BlockInfo) assert if_block_info.branch_condition is not None @@ -601,25 +601,23 @@ def build_flowgraph_between( """ curr_start = start body = Body(print_node_comment=context.options.debug) - print(f"build_flowgraph_between({start.block.index}, {end.block.index})") + # print(f"build_flowgraph_between({start.block.index}, {end.block.index})") # We will split this graph into subgraphs, where the entrance and exit nodes # of that subgraph are at the same indentation level. "curr_start" will # iterate through these nodes, which are commonly referred to as # articulation nodes. while curr_start != end: - print(f"...curr_start={curr_start.block.index}") - if curr_start.block.index == 4: - breakpoint() + # print(f"...curr_start={curr_start.block.index}") # Write the current node (but return nodes are handled specially). if not isinstance(curr_start, ReturnNode): # Before we do anything else, we pattern-match the subgraph # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: if isinstance(curr_start, ConditionalNode): - loops = curr_start.loop_edges() - for loop in loops: - postdominated + # loops = curr_start.loop_edges() + # for loop in loops: + # postdominated do_while_loop = pattern_match_simple_do_while_loop( context, curr_start, indent ) @@ -636,7 +634,6 @@ def build_flowgraph_between( # hints at that situation better than if we just blindly duplicate # the block. if curr_start in context.emitted_nodes: - breakpoint() emit_goto(context, curr_start, body, indent) break context.emitted_nodes.add(curr_start) @@ -690,20 +687,20 @@ def build_flowgraph_between( elif isinstance(curr_start, ConditionalNode): # Once again, before anything else, we pattern match against "big" # do-while loops. - loops = curr_start.loop_edges() - loops = list( - filter( - lambda n: isinstance(n, ConditionalNode) and not n.is_self_loop(), - loops, - ) - ) - if loops: - curr_end = sorted(loops, key=lambda n: n.block.index, reverse=True)[0] - body.add_do_while_loop( - get_do_while_loop_between(context, curr_start, curr_end, indent) - ) - curr_start = curr_end.fallthrough_edge - continue + # loops = curr_start.loop_edges() + # loops = list( + # filter( + # lambda n: isinstance(n, ConditionalNode) and not n.is_self_loop(), + # loops, + # ) + # ) + # if loops: + # curr_end = sorted(loops, key=lambda n: n.block.index, reverse=True)[0] + # body.add_do_while_loop( + # get_do_while_loop_between(context, curr_start, curr_end, indent) + # ) + # curr_start = curr_end.fallthrough_edge + # continue # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" From f4aa9e68f85e3798ed12f9bba1da99bb5b0a4b1b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 20:58:11 -0700 Subject: [PATCH 26/54] Add a broken test, to be deleted --- tests/end_to_end/loop_optimizations/irix-g.s | 74 ++++++++++ tests/end_to_end/loop_optimizations/irix-o2.s | 139 ++++++++++++++++++ tests/end_to_end/loop_optimizations/orig.c | 33 +++++ 3 files changed, 246 insertions(+) create mode 100644 tests/end_to_end/loop_optimizations/irix-g.s create mode 100644 tests/end_to_end/loop_optimizations/irix-o2.s create mode 100644 tests/end_to_end/loop_optimizations/orig.c diff --git a/tests/end_to_end/loop_optimizations/irix-g.s b/tests/end_to_end/loop_optimizations/irix-g.s new file mode 100644 index 00000000..faafbef6 --- /dev/null +++ b/tests/end_to_end/loop_optimizations/irix-g.s @@ -0,0 +1,74 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 000090 00400090 27BDFFF0 */ addiu $sp, $sp, -0x10 +/* 000094 00400094 18A00009 */ blez $a1, .L004000BC +/* 000098 00400098 AFA0000C */ sw $zero, 0xc($sp) +.L0040009C: +/* 00009C 0040009C 8FAE000C */ lw $t6, 0xc($sp) +/* 0000A0 004000A0 008E7821 */ addu $t7, $a0, $t6 +/* 0000A4 004000A4 A1E00000 */ sb $zero, ($t7) +/* 0000A8 004000A8 8FB8000C */ lw $t8, 0xc($sp) +/* 0000AC 004000AC 27190001 */ addiu $t9, $t8, 1 +/* 0000B0 004000B0 0325082A */ slt $at, $t9, $a1 +/* 0000B4 004000B4 1420FFF9 */ bnez $at, .L0040009C +/* 0000B8 004000B8 AFB9000C */ sw $t9, 0xc($sp) +.L004000BC: +/* 0000BC 004000BC AFA00008 */ sw $zero, 8($sp) +/* 0000C0 004000C0 18A00009 */ blez $a1, .L004000E8 +/* 0000C4 004000C4 AFA0000C */ sw $zero, 0xc($sp) +.L004000C8: +/* 0000C8 004000C8 8FA80008 */ lw $t0, 8($sp) +/* 0000CC 004000CC 25090001 */ addiu $t1, $t0, 1 +/* 0000D0 004000D0 AFA90008 */ sw $t1, 8($sp) +/* 0000D4 004000D4 8FAA000C */ lw $t2, 0xc($sp) +/* 0000D8 004000D8 254B0001 */ addiu $t3, $t2, 1 +/* 0000DC 004000DC 0165082A */ slt $at, $t3, $a1 +/* 0000E0 004000E0 1420FFF9 */ bnez $at, .L004000C8 +/* 0000E4 004000E4 AFAB000C */ sw $t3, 0xc($sp) +.L004000E8: +/* 0000E8 004000E8 240C0001 */ addiu $t4, $zero, 1 +/* 0000EC 004000EC AFAC0004 */ sw $t4, 4($sp) +/* 0000F0 004000F0 240D0001 */ addiu $t5, $zero, 1 +/* 0000F4 004000F4 28A10002 */ slti $at, $a1, 2 +/* 0000F8 004000F8 1420000C */ bnez $at, .L0040012C +/* 0000FC 004000FC AFAD000C */ sw $t5, 0xc($sp) +.L00400100: +/* 000100 00400100 8FAE0004 */ lw $t6, 4($sp) +/* 000104 00400104 8FAF000C */ lw $t7, 0xc($sp) +/* 000108 00400108 01CF0019 */ multu $t6, $t7 +/* 00010C 0040010C 0000C012 */ mflo $t8 +/* 000110 00400110 AFB80004 */ sw $t8, 4($sp) +/* 000114 00400114 00000000 */ nop +/* 000118 00400118 8FB9000C */ lw $t9, 0xc($sp) +/* 00011C 0040011C 27280001 */ addiu $t0, $t9, 1 +/* 000120 00400120 0105082A */ slt $at, $t0, $a1 +/* 000124 00400124 1420FFF6 */ bnez $at, .L00400100 +/* 000128 00400128 AFA8000C */ sw $t0, 0xc($sp) +.L0040012C: +/* 00012C 0040012C 18A00010 */ blez $a1, .L00400170 +/* 000130 00400130 AFA0000C */ sw $zero, 0xc($sp) +.L00400134: +/* 000134 00400134 8FA90000 */ lw $t1, ($sp) +/* 000138 00400138 8FAA000C */ lw $t2, 0xc($sp) +/* 00013C 0040013C 012A5821 */ addu $t3, $t1, $t2 +/* 000140 00400140 AFAB0000 */ sw $t3, ($sp) +/* 000144 00400144 8FAC0000 */ lw $t4, ($sp) +/* 000148 00400148 8FAD000C */ lw $t5, 0xc($sp) +/* 00014C 0040014C 018D0019 */ multu $t4, $t5 +/* 000150 00400150 00007012 */ mflo $t6 +/* 000154 00400154 AFAE0000 */ sw $t6, ($sp) +/* 000158 00400158 00000000 */ nop +/* 00015C 0040015C 8FAF000C */ lw $t7, 0xc($sp) +/* 000160 00400160 25F80001 */ addiu $t8, $t7, 1 +/* 000164 00400164 0305082A */ slt $at, $t8, $a1 +/* 000168 00400168 1420FFF2 */ bnez $at, .L00400134 +/* 00016C 0040016C AFB8000C */ sw $t8, 0xc($sp) +.L00400170: +/* 000170 00400170 10000001 */ b .L00400178 +/* 000174 00400174 00000000 */ nop +.L00400178: +/* 000178 00400178 03E00008 */ jr $ra +/* 00017C 0040017C 27BD0010 */ addiu $sp, $sp, 0x10 diff --git a/tests/end_to_end/loop_optimizations/irix-o2.s b/tests/end_to_end/loop_optimizations/irix-o2.s new file mode 100644 index 00000000..3bf77a9d --- /dev/null +++ b/tests/end_to_end/loop_optimizations/irix-o2.s @@ -0,0 +1,139 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 000090 00400090 27BDFFF0 */ addiu $sp, $sp, -0x10 +/* 000094 00400094 18A00015 */ blez $a1, .L004000EC +/* 000098 00400098 00001025 */ move $v0, $zero +/* 00009C 0040009C 30A80003 */ andi $t0, $a1, 3 +/* 0000A0 004000A0 11000009 */ beqz $t0, .L004000C8 +/* 0000A4 004000A4 01003825 */ move $a3, $t0 +/* 0000A8 004000A8 00801821 */ move $v1, $a0 +/* 0000AC 004000AC 24060001 */ addiu $a2, $zero, 1 +.L004000B0: +/* 0000B0 004000B0 00C01025 */ move $v0, $a2 +/* 0000B4 004000B4 A0600000 */ sb $zero, ($v1) +/* 0000B8 004000B8 24630001 */ addiu $v1, $v1, 1 +/* 0000BC 004000BC 14E6FFFC */ bne $a3, $a2, .L004000B0 +/* 0000C0 004000C0 24C60001 */ addiu $a2, $a2, 1 +/* 0000C4 004000C4 10450008 */ beq $v0, $a1, .L004000E8 +.L004000C8: +/* 0000C8 004000C8 00821821 */ addu $v1, $a0, $v0 +.L004000CC: +/* 0000CC 004000CC 24420004 */ addiu $v0, $v0, 4 +/* 0000D0 004000D0 A0600001 */ sb $zero, 1($v1) +/* 0000D4 004000D4 A0600002 */ sb $zero, 2($v1) +/* 0000D8 004000D8 A0600003 */ sb $zero, 3($v1) +/* 0000DC 004000DC 24630004 */ addiu $v1, $v1, 4 +/* 0000E0 004000E0 1445FFFA */ bne $v0, $a1, .L004000CC +/* 0000E4 004000E4 A060FFFC */ sb $zero, -4($v1) +.L004000E8: +/* 0000E8 004000E8 00001025 */ move $v0, $zero +.L004000EC: +/* 0000EC 004000EC 18A0000C */ blez $a1, .L00400120 +/* 0000F0 004000F0 28A10002 */ slti $at, $a1, 2 +/* 0000F4 004000F4 30A80003 */ andi $t0, $a1, 3 +/* 0000F8 004000F8 11000006 */ beqz $t0, .L00400114 +/* 0000FC 004000FC 01001825 */ move $v1, $t0 +/* 000100 00400100 24460001 */ addiu $a2, $v0, 1 +.L00400104: +/* 000104 00400104 00C01025 */ move $v0, $a2 +/* 000108 00400108 1466FFFE */ bne $v1, $a2, .L00400104 +/* 00010C 0040010C 24C60001 */ addiu $a2, $a2, 1 +/* 000110 00400110 10450003 */ beq $v0, $a1, .L00400120 +.L00400114: +/* 000114 00400114 24420004 */ addiu $v0, $v0, 4 +.L00400118: +/* 000118 00400118 5445FFFF */ bnel $v0, $a1, .L00400118 +/* 00011C 0040011C 24420004 */ addiu $v0, $v0, 4 +.L00400120: +/* 000120 00400120 24030001 */ addiu $v1, $zero, 1 +/* 000124 00400124 14200023 */ bnez $at, .L004001B4 +/* 000128 00400128 24020001 */ addiu $v0, $zero, 1 +/* 00012C 0040012C 24A7FFFF */ addiu $a3, $a1, -1 +/* 000130 00400130 30EE0003 */ andi $t6, $a3, 3 +/* 000134 00400134 11C00008 */ beqz $t6, .L00400158 +/* 000138 00400138 25C40001 */ addiu $a0, $t6, 1 +/* 00013C 0040013C 24460001 */ addiu $a2, $v0, 1 +.L00400140: +/* 000140 00400140 00620019 */ multu $v1, $v0 +/* 000144 00400144 00C01025 */ move $v0, $a2 +/* 000148 00400148 00001812 */ mflo $v1 +/* 00014C 0040014C 1486FFFC */ bne $a0, $a2, .L00400140 +/* 000150 00400150 24C60001 */ addiu $a2, $a2, 1 +/* 000154 00400154 10450017 */ beq $v0, $a1, .L004001B4 +.L00400158: +/* 000158 00400158 24460001 */ addiu $a2, $v0, 1 +/* 00015C 0040015C 24440002 */ addiu $a0, $v0, 2 +/* 000160 00400160 24470003 */ addiu $a3, $v0, 3 +.L00400164: +/* 000164 00400164 00620019 */ multu $v1, $v0 +/* 000168 00400168 24420004 */ addiu $v0, $v0, 4 +/* 00016C 0040016C 00001812 */ mflo $v1 +/* 000170 00400170 00000000 */ nop +/* 000174 00400174 00000000 */ nop +/* 000178 00400178 00660019 */ multu $v1, $a2 +/* 00017C 0040017C 24C60004 */ addiu $a2, $a2, 4 +/* 000180 00400180 00001812 */ mflo $v1 +/* 000184 00400184 00000000 */ nop +/* 000188 00400188 00000000 */ nop +/* 00018C 0040018C 00640019 */ multu $v1, $a0 +/* 000190 00400190 24840004 */ addiu $a0, $a0, 4 +/* 000194 00400194 00001812 */ mflo $v1 +/* 000198 00400198 00000000 */ nop +/* 00019C 0040019C 00000000 */ nop +/* 0001A0 004001A0 00670019 */ multu $v1, $a3 +/* 0001A4 004001A4 24E70004 */ addiu $a3, $a3, 4 +/* 0001A8 004001A8 00001812 */ mflo $v1 +/* 0001AC 004001AC 1445FFED */ bne $v0, $a1, .L00400164 +/* 0001B0 004001B0 00000000 */ nop +.L004001B4: +/* 0001B4 004001B4 18A00027 */ blez $a1, .L00400254 +/* 0001B8 004001B8 00001025 */ move $v0, $zero +/* 0001BC 004001BC 30A80003 */ andi $t0, $a1, 3 +/* 0001C0 004001C0 1100000B */ beqz $t0, .L004001F0 +/* 0001C4 004001C4 01002025 */ move $a0, $t0 +/* 0001C8 004001C8 24060001 */ addiu $a2, $zero, 1 +/* 0001CC 004001CC 8FA30000 */ lw $v1, ($sp) +.L004001D0: +/* 0001D0 004001D0 00621821 */ addu $v1, $v1, $v0 +/* 0001D4 004001D4 00620019 */ multu $v1, $v0 +/* 0001D8 004001D8 00C01025 */ move $v0, $a2 +/* 0001DC 004001DC 00001812 */ mflo $v1 +/* 0001E0 004001E0 1486FFFB */ bne $a0, $a2, .L004001D0 +/* 0001E4 004001E4 24C60001 */ addiu $a2, $a2, 1 +/* 0001E8 004001E8 1045001A */ beq $v0, $a1, .L00400254 +/* 0001EC 004001EC AFA30000 */ sw $v1, ($sp) +.L004001F0: +/* 0001F0 004001F0 8FA30000 */ lw $v1, ($sp) +/* 0001F4 004001F4 24460001 */ addiu $a2, $v0, 1 +/* 0001F8 004001F8 24440002 */ addiu $a0, $v0, 2 +/* 0001FC 004001FC 24470003 */ addiu $a3, $v0, 3 +/* 000200 00400200 00621821 */ addu $v1, $v1, $v0 +.L00400204: +/* 000204 00400204 00620019 */ multu $v1, $v0 +/* 000208 00400208 00001812 */ mflo $v1 +/* 00020C 0040020C 00621821 */ addu $v1, $v1, $v0 +/* 000210 00400210 24630001 */ addiu $v1, $v1, 1 +/* 000214 00400214 00660019 */ multu $v1, $a2 +/* 000218 00400218 24C60004 */ addiu $a2, $a2, 4 +/* 00021C 0040021C 00001812 */ mflo $v1 +/* 000220 00400220 00621821 */ addu $v1, $v1, $v0 +/* 000224 00400224 24630002 */ addiu $v1, $v1, 2 +/* 000228 00400228 00640019 */ multu $v1, $a0 +/* 00022C 0040022C 24840004 */ addiu $a0, $a0, 4 +/* 000230 00400230 00001812 */ mflo $v1 +/* 000234 00400234 00621821 */ addu $v1, $v1, $v0 +/* 000238 00400238 24630003 */ addiu $v1, $v1, 3 +/* 00023C 0040023C 00670019 */ multu $v1, $a3 +/* 000240 00400240 24420004 */ addiu $v0, $v0, 4 +/* 000244 00400244 24E70004 */ addiu $a3, $a3, 4 +/* 000248 00400248 00001812 */ mflo $v1 +/* 00024C 0040024C 5445FFED */ bnel $v0, $a1, .L00400204 +/* 000250 00400250 00621821 */ addu $v1, $v1, $v0 +.L00400254: +/* 000254 00400254 03E00008 */ jr $ra +/* 000258 00400258 27BD0010 */ addiu $sp, $sp, 0x10 + +/* 00025C 0040025C 00000000 */ nop diff --git a/tests/end_to_end/loop_optimizations/orig.c b/tests/end_to_end/loop_optimizations/orig.c new file mode 100644 index 00000000..737219ea --- /dev/null +++ b/tests/end_to_end/loop_optimizations/orig.c @@ -0,0 +1,33 @@ +void test(char *foo, int length) +{ + int i; + int total1, total2, total3; + + // array zeroing + for (i = 0; i < length; i++) + { + foo[i] = 0; + } + + // simple addition + total1 = 0; + for (i = 0; i < length; i++) + { + total1 += 1; + } + + // simple multiplication + total2 = 1; + for (i = 1; i < length; i++) + { + total2 *= i; + } + + // addition and multiplication + total3 = 0; + for (i = 0; i < length; i++) + { + total3 += i; + total3 *= i; + } +} From 2768028f85d21b32b98e3d5e05991c0dc43e653b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 20:59:47 -0700 Subject: [PATCH 27/54] Undo last several commits for a fresh start --- src/flow_graph.py | 6 - src/if_statements.py | 53 +------ src/translate.py | 1 + tests/end_to_end/loop_optimizations/irix-g.s | 74 ---------- tests/end_to_end/loop_optimizations/irix-o2.s | 139 ------------------ tests/end_to_end/loop_optimizations/orig.c | 33 ----- 6 files changed, 5 insertions(+), 301 deletions(-) delete mode 100644 tests/end_to_end/loop_optimizations/irix-g.s delete mode 100644 tests/end_to_end/loop_optimizations/irix-o2.s delete mode 100644 tests/end_to_end/loop_optimizations/orig.c diff --git a/src/flow_graph.py b/src/flow_graph.py index f589e00f..31783dcb 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -550,12 +550,6 @@ def replace_parent(self, replace_this: "Node", with_this: "Node") -> None: self.parents.remove(replace_this) self.parents.add(with_this) - def is_loop(self) -> bool: - return False # overridden by some child classes - - def loop_edges(self) -> List["Node"]: - return list(filter(lambda n: n.is_loop(), self.parents)) - @abc.abstractmethod def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: ... diff --git a/src/if_statements.py b/src/if_statements.py index 5e985b6d..4bb5fefc 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -122,9 +122,9 @@ def __str__(self) -> str: brace_after_do = f"\n{space}{{" if self.coding_style.newline_after_if else " {" cond = stringify_expr(self.condition).rstrip(";") if self.condition else "" - body = str(self.body) + body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ - f"{space}do{brace_after_do}\n{body}", + f"{space}do{brace_after_do}{body}", f"{space}}} while ({cond});", ] return "\n".join(string_components) @@ -223,7 +223,6 @@ def build_conditional_subgraph( given that "start" is a ConditionalNode; this program will intelligently output if/else relationships. """ - # print(f"build_conditional_subgraph({start.block.index}, {end.block.index})") if_block_info = start.block.block_info assert isinstance(if_block_info, BlockInfo) assert if_block_info.branch_condition is not None @@ -559,35 +558,13 @@ def pattern_match_simple_do_while_loop( loop_body = Body(False, []) emit_node(context, start, loop_body, indent + 4) - return DoWhileLoop( + do_while = DoWhileLoop( indent, context.options.coding_style, loop_body, start.block.block_info.branch_condition, ) - - -def get_do_while_loop_between( - context: Context, start: ConditionalNode, end: ConditionalNode, indent: int -) -> DoWhileLoop: - assert end.block.block_info - assert end.block.block_info.branch_condition - - # TODO: fallthrough_edge needs to be the right thing here - # the real detection is conditional has a reverse arrow... - # (a self-arrow IS a reverse arrow, so we can even consolidate with the - # above function) - loop_body = build_flowgraph_between( - context, start.fallthrough_edge, end, indent + 4 - ) - emit_node(context, end, loop_body, indent + 4) - - return DoWhileLoop( - indent, - context.options.coding_style, - loop_body, - end.block.block_info.branch_condition, # TODO: negated? - ) + return do_while def build_flowgraph_between( @@ -601,23 +578,18 @@ def build_flowgraph_between( """ curr_start = start body = Body(print_node_comment=context.options.debug) - # print(f"build_flowgraph_between({start.block.index}, {end.block.index})") # We will split this graph into subgraphs, where the entrance and exit nodes # of that subgraph are at the same indentation level. "curr_start" will # iterate through these nodes, which are commonly referred to as # articulation nodes. while curr_start != end: - # print(f"...curr_start={curr_start.block.index}") # Write the current node (but return nodes are handled specially). if not isinstance(curr_start, ReturnNode): # Before we do anything else, we pattern-match the subgraph # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: if isinstance(curr_start, ConditionalNode): - # loops = curr_start.loop_edges() - # for loop in loops: - # postdominated do_while_loop = pattern_match_simple_do_while_loop( context, curr_start, indent ) @@ -685,23 +657,6 @@ def build_flowgraph_between( # In a BasicNode, the successor is the next articulation node. curr_start = curr_start.successor elif isinstance(curr_start, ConditionalNode): - # Once again, before anything else, we pattern match against "big" - # do-while loops. - # loops = curr_start.loop_edges() - # loops = list( - # filter( - # lambda n: isinstance(n, ConditionalNode) and not n.is_self_loop(), - # loops, - # ) - # ) - # if loops: - # curr_end = sorted(loops, key=lambda n: n.block.index, reverse=True)[0] - # body.add_do_while_loop( - # get_do_while_loop_between(context, curr_start, curr_end, indent) - # ) - # curr_start = curr_end.fallthrough_edge - # continue - # A ConditionalNode means we need to find the next articulation # node. This means we need to find the "immediate postdominator" # of the current node, where "postdominator" means we have to go diff --git a/src/translate.py b/src/translate.py index 6855d683..c3fd77c8 100644 --- a/src/translate.py +++ b/src/translate.py @@ -23,6 +23,7 @@ Node, ReturnNode, SwitchNode, + build_flowgraph, ) from .options import Options from .parse_file import Rodata diff --git a/tests/end_to_end/loop_optimizations/irix-g.s b/tests/end_to_end/loop_optimizations/irix-g.s deleted file mode 100644 index faafbef6..00000000 --- a/tests/end_to_end/loop_optimizations/irix-g.s +++ /dev/null @@ -1,74 +0,0 @@ -.set noat # allow manual use of $at -.set noreorder # don't insert nops after branches - - -glabel test -/* 000090 00400090 27BDFFF0 */ addiu $sp, $sp, -0x10 -/* 000094 00400094 18A00009 */ blez $a1, .L004000BC -/* 000098 00400098 AFA0000C */ sw $zero, 0xc($sp) -.L0040009C: -/* 00009C 0040009C 8FAE000C */ lw $t6, 0xc($sp) -/* 0000A0 004000A0 008E7821 */ addu $t7, $a0, $t6 -/* 0000A4 004000A4 A1E00000 */ sb $zero, ($t7) -/* 0000A8 004000A8 8FB8000C */ lw $t8, 0xc($sp) -/* 0000AC 004000AC 27190001 */ addiu $t9, $t8, 1 -/* 0000B0 004000B0 0325082A */ slt $at, $t9, $a1 -/* 0000B4 004000B4 1420FFF9 */ bnez $at, .L0040009C -/* 0000B8 004000B8 AFB9000C */ sw $t9, 0xc($sp) -.L004000BC: -/* 0000BC 004000BC AFA00008 */ sw $zero, 8($sp) -/* 0000C0 004000C0 18A00009 */ blez $a1, .L004000E8 -/* 0000C4 004000C4 AFA0000C */ sw $zero, 0xc($sp) -.L004000C8: -/* 0000C8 004000C8 8FA80008 */ lw $t0, 8($sp) -/* 0000CC 004000CC 25090001 */ addiu $t1, $t0, 1 -/* 0000D0 004000D0 AFA90008 */ sw $t1, 8($sp) -/* 0000D4 004000D4 8FAA000C */ lw $t2, 0xc($sp) -/* 0000D8 004000D8 254B0001 */ addiu $t3, $t2, 1 -/* 0000DC 004000DC 0165082A */ slt $at, $t3, $a1 -/* 0000E0 004000E0 1420FFF9 */ bnez $at, .L004000C8 -/* 0000E4 004000E4 AFAB000C */ sw $t3, 0xc($sp) -.L004000E8: -/* 0000E8 004000E8 240C0001 */ addiu $t4, $zero, 1 -/* 0000EC 004000EC AFAC0004 */ sw $t4, 4($sp) -/* 0000F0 004000F0 240D0001 */ addiu $t5, $zero, 1 -/* 0000F4 004000F4 28A10002 */ slti $at, $a1, 2 -/* 0000F8 004000F8 1420000C */ bnez $at, .L0040012C -/* 0000FC 004000FC AFAD000C */ sw $t5, 0xc($sp) -.L00400100: -/* 000100 00400100 8FAE0004 */ lw $t6, 4($sp) -/* 000104 00400104 8FAF000C */ lw $t7, 0xc($sp) -/* 000108 00400108 01CF0019 */ multu $t6, $t7 -/* 00010C 0040010C 0000C012 */ mflo $t8 -/* 000110 00400110 AFB80004 */ sw $t8, 4($sp) -/* 000114 00400114 00000000 */ nop -/* 000118 00400118 8FB9000C */ lw $t9, 0xc($sp) -/* 00011C 0040011C 27280001 */ addiu $t0, $t9, 1 -/* 000120 00400120 0105082A */ slt $at, $t0, $a1 -/* 000124 00400124 1420FFF6 */ bnez $at, .L00400100 -/* 000128 00400128 AFA8000C */ sw $t0, 0xc($sp) -.L0040012C: -/* 00012C 0040012C 18A00010 */ blez $a1, .L00400170 -/* 000130 00400130 AFA0000C */ sw $zero, 0xc($sp) -.L00400134: -/* 000134 00400134 8FA90000 */ lw $t1, ($sp) -/* 000138 00400138 8FAA000C */ lw $t2, 0xc($sp) -/* 00013C 0040013C 012A5821 */ addu $t3, $t1, $t2 -/* 000140 00400140 AFAB0000 */ sw $t3, ($sp) -/* 000144 00400144 8FAC0000 */ lw $t4, ($sp) -/* 000148 00400148 8FAD000C */ lw $t5, 0xc($sp) -/* 00014C 0040014C 018D0019 */ multu $t4, $t5 -/* 000150 00400150 00007012 */ mflo $t6 -/* 000154 00400154 AFAE0000 */ sw $t6, ($sp) -/* 000158 00400158 00000000 */ nop -/* 00015C 0040015C 8FAF000C */ lw $t7, 0xc($sp) -/* 000160 00400160 25F80001 */ addiu $t8, $t7, 1 -/* 000164 00400164 0305082A */ slt $at, $t8, $a1 -/* 000168 00400168 1420FFF2 */ bnez $at, .L00400134 -/* 00016C 0040016C AFB8000C */ sw $t8, 0xc($sp) -.L00400170: -/* 000170 00400170 10000001 */ b .L00400178 -/* 000174 00400174 00000000 */ nop -.L00400178: -/* 000178 00400178 03E00008 */ jr $ra -/* 00017C 0040017C 27BD0010 */ addiu $sp, $sp, 0x10 diff --git a/tests/end_to_end/loop_optimizations/irix-o2.s b/tests/end_to_end/loop_optimizations/irix-o2.s deleted file mode 100644 index 3bf77a9d..00000000 --- a/tests/end_to_end/loop_optimizations/irix-o2.s +++ /dev/null @@ -1,139 +0,0 @@ -.set noat # allow manual use of $at -.set noreorder # don't insert nops after branches - - -glabel test -/* 000090 00400090 27BDFFF0 */ addiu $sp, $sp, -0x10 -/* 000094 00400094 18A00015 */ blez $a1, .L004000EC -/* 000098 00400098 00001025 */ move $v0, $zero -/* 00009C 0040009C 30A80003 */ andi $t0, $a1, 3 -/* 0000A0 004000A0 11000009 */ beqz $t0, .L004000C8 -/* 0000A4 004000A4 01003825 */ move $a3, $t0 -/* 0000A8 004000A8 00801821 */ move $v1, $a0 -/* 0000AC 004000AC 24060001 */ addiu $a2, $zero, 1 -.L004000B0: -/* 0000B0 004000B0 00C01025 */ move $v0, $a2 -/* 0000B4 004000B4 A0600000 */ sb $zero, ($v1) -/* 0000B8 004000B8 24630001 */ addiu $v1, $v1, 1 -/* 0000BC 004000BC 14E6FFFC */ bne $a3, $a2, .L004000B0 -/* 0000C0 004000C0 24C60001 */ addiu $a2, $a2, 1 -/* 0000C4 004000C4 10450008 */ beq $v0, $a1, .L004000E8 -.L004000C8: -/* 0000C8 004000C8 00821821 */ addu $v1, $a0, $v0 -.L004000CC: -/* 0000CC 004000CC 24420004 */ addiu $v0, $v0, 4 -/* 0000D0 004000D0 A0600001 */ sb $zero, 1($v1) -/* 0000D4 004000D4 A0600002 */ sb $zero, 2($v1) -/* 0000D8 004000D8 A0600003 */ sb $zero, 3($v1) -/* 0000DC 004000DC 24630004 */ addiu $v1, $v1, 4 -/* 0000E0 004000E0 1445FFFA */ bne $v0, $a1, .L004000CC -/* 0000E4 004000E4 A060FFFC */ sb $zero, -4($v1) -.L004000E8: -/* 0000E8 004000E8 00001025 */ move $v0, $zero -.L004000EC: -/* 0000EC 004000EC 18A0000C */ blez $a1, .L00400120 -/* 0000F0 004000F0 28A10002 */ slti $at, $a1, 2 -/* 0000F4 004000F4 30A80003 */ andi $t0, $a1, 3 -/* 0000F8 004000F8 11000006 */ beqz $t0, .L00400114 -/* 0000FC 004000FC 01001825 */ move $v1, $t0 -/* 000100 00400100 24460001 */ addiu $a2, $v0, 1 -.L00400104: -/* 000104 00400104 00C01025 */ move $v0, $a2 -/* 000108 00400108 1466FFFE */ bne $v1, $a2, .L00400104 -/* 00010C 0040010C 24C60001 */ addiu $a2, $a2, 1 -/* 000110 00400110 10450003 */ beq $v0, $a1, .L00400120 -.L00400114: -/* 000114 00400114 24420004 */ addiu $v0, $v0, 4 -.L00400118: -/* 000118 00400118 5445FFFF */ bnel $v0, $a1, .L00400118 -/* 00011C 0040011C 24420004 */ addiu $v0, $v0, 4 -.L00400120: -/* 000120 00400120 24030001 */ addiu $v1, $zero, 1 -/* 000124 00400124 14200023 */ bnez $at, .L004001B4 -/* 000128 00400128 24020001 */ addiu $v0, $zero, 1 -/* 00012C 0040012C 24A7FFFF */ addiu $a3, $a1, -1 -/* 000130 00400130 30EE0003 */ andi $t6, $a3, 3 -/* 000134 00400134 11C00008 */ beqz $t6, .L00400158 -/* 000138 00400138 25C40001 */ addiu $a0, $t6, 1 -/* 00013C 0040013C 24460001 */ addiu $a2, $v0, 1 -.L00400140: -/* 000140 00400140 00620019 */ multu $v1, $v0 -/* 000144 00400144 00C01025 */ move $v0, $a2 -/* 000148 00400148 00001812 */ mflo $v1 -/* 00014C 0040014C 1486FFFC */ bne $a0, $a2, .L00400140 -/* 000150 00400150 24C60001 */ addiu $a2, $a2, 1 -/* 000154 00400154 10450017 */ beq $v0, $a1, .L004001B4 -.L00400158: -/* 000158 00400158 24460001 */ addiu $a2, $v0, 1 -/* 00015C 0040015C 24440002 */ addiu $a0, $v0, 2 -/* 000160 00400160 24470003 */ addiu $a3, $v0, 3 -.L00400164: -/* 000164 00400164 00620019 */ multu $v1, $v0 -/* 000168 00400168 24420004 */ addiu $v0, $v0, 4 -/* 00016C 0040016C 00001812 */ mflo $v1 -/* 000170 00400170 00000000 */ nop -/* 000174 00400174 00000000 */ nop -/* 000178 00400178 00660019 */ multu $v1, $a2 -/* 00017C 0040017C 24C60004 */ addiu $a2, $a2, 4 -/* 000180 00400180 00001812 */ mflo $v1 -/* 000184 00400184 00000000 */ nop -/* 000188 00400188 00000000 */ nop -/* 00018C 0040018C 00640019 */ multu $v1, $a0 -/* 000190 00400190 24840004 */ addiu $a0, $a0, 4 -/* 000194 00400194 00001812 */ mflo $v1 -/* 000198 00400198 00000000 */ nop -/* 00019C 0040019C 00000000 */ nop -/* 0001A0 004001A0 00670019 */ multu $v1, $a3 -/* 0001A4 004001A4 24E70004 */ addiu $a3, $a3, 4 -/* 0001A8 004001A8 00001812 */ mflo $v1 -/* 0001AC 004001AC 1445FFED */ bne $v0, $a1, .L00400164 -/* 0001B0 004001B0 00000000 */ nop -.L004001B4: -/* 0001B4 004001B4 18A00027 */ blez $a1, .L00400254 -/* 0001B8 004001B8 00001025 */ move $v0, $zero -/* 0001BC 004001BC 30A80003 */ andi $t0, $a1, 3 -/* 0001C0 004001C0 1100000B */ beqz $t0, .L004001F0 -/* 0001C4 004001C4 01002025 */ move $a0, $t0 -/* 0001C8 004001C8 24060001 */ addiu $a2, $zero, 1 -/* 0001CC 004001CC 8FA30000 */ lw $v1, ($sp) -.L004001D0: -/* 0001D0 004001D0 00621821 */ addu $v1, $v1, $v0 -/* 0001D4 004001D4 00620019 */ multu $v1, $v0 -/* 0001D8 004001D8 00C01025 */ move $v0, $a2 -/* 0001DC 004001DC 00001812 */ mflo $v1 -/* 0001E0 004001E0 1486FFFB */ bne $a0, $a2, .L004001D0 -/* 0001E4 004001E4 24C60001 */ addiu $a2, $a2, 1 -/* 0001E8 004001E8 1045001A */ beq $v0, $a1, .L00400254 -/* 0001EC 004001EC AFA30000 */ sw $v1, ($sp) -.L004001F0: -/* 0001F0 004001F0 8FA30000 */ lw $v1, ($sp) -/* 0001F4 004001F4 24460001 */ addiu $a2, $v0, 1 -/* 0001F8 004001F8 24440002 */ addiu $a0, $v0, 2 -/* 0001FC 004001FC 24470003 */ addiu $a3, $v0, 3 -/* 000200 00400200 00621821 */ addu $v1, $v1, $v0 -.L00400204: -/* 000204 00400204 00620019 */ multu $v1, $v0 -/* 000208 00400208 00001812 */ mflo $v1 -/* 00020C 0040020C 00621821 */ addu $v1, $v1, $v0 -/* 000210 00400210 24630001 */ addiu $v1, $v1, 1 -/* 000214 00400214 00660019 */ multu $v1, $a2 -/* 000218 00400218 24C60004 */ addiu $a2, $a2, 4 -/* 00021C 0040021C 00001812 */ mflo $v1 -/* 000220 00400220 00621821 */ addu $v1, $v1, $v0 -/* 000224 00400224 24630002 */ addiu $v1, $v1, 2 -/* 000228 00400228 00640019 */ multu $v1, $a0 -/* 00022C 0040022C 24840004 */ addiu $a0, $a0, 4 -/* 000230 00400230 00001812 */ mflo $v1 -/* 000234 00400234 00621821 */ addu $v1, $v1, $v0 -/* 000238 00400238 24630003 */ addiu $v1, $v1, 3 -/* 00023C 0040023C 00670019 */ multu $v1, $a3 -/* 000240 00400240 24420004 */ addiu $v0, $v0, 4 -/* 000244 00400244 24E70004 */ addiu $a3, $a3, 4 -/* 000248 00400248 00001812 */ mflo $v1 -/* 00024C 0040024C 5445FFED */ bnel $v0, $a1, .L00400204 -/* 000250 00400250 00621821 */ addu $v1, $v1, $v0 -.L00400254: -/* 000254 00400254 03E00008 */ jr $ra -/* 000258 00400258 27BD0010 */ addiu $sp, $sp, 0x10 - -/* 00025C 0040025C 00000000 */ nop diff --git a/tests/end_to_end/loop_optimizations/orig.c b/tests/end_to_end/loop_optimizations/orig.c deleted file mode 100644 index 737219ea..00000000 --- a/tests/end_to_end/loop_optimizations/orig.c +++ /dev/null @@ -1,33 +0,0 @@ -void test(char *foo, int length) -{ - int i; - int total1, total2, total3; - - // array zeroing - for (i = 0; i < length; i++) - { - foo[i] = 0; - } - - // simple addition - total1 = 0; - for (i = 0; i < length; i++) - { - total1 += 1; - } - - // simple multiplication - total2 = 1; - for (i = 1; i < length; i++) - { - total2 *= i; - } - - // addition and multiplication - total3 = 0; - for (i = 0; i < length; i++) - { - total3 += i; - total3 *= i; - } -} From 49fbcfa19dc53a175bece4cd632dc6131a558d81 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 21:18:13 -0700 Subject: [PATCH 28/54] Improve detection of do-whiles --- src/if_statements.py | 32 ++++++++++++++++------ tests/end_to_end/loop_nested/irix-g-out.c | 12 ++------ tests/end_to_end/loop_nested/irix-o2-out.c | 28 ++++++++----------- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 4bb5fefc..15e66940 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -122,9 +122,9 @@ def __str__(self) -> str: brace_after_do = f"\n{space}{{" if self.coding_style.newline_after_if else " {" cond = stringify_expr(self.condition).rstrip(";") if self.condition else "" - body = f"\n".join(f"{stmt}" for stmt in self.body.statements) string_components = [ - f"{space}do{brace_after_do}{body}", + f"{space}do{brace_after_do}", + str(self.body), f"{space}}} while ({cond});", ] return "\n".join(string_components) @@ -550,21 +550,34 @@ def add_return_statement( def pattern_match_simple_do_while_loop( context: Context, start: ConditionalNode, indent: int ) -> Optional[DoWhileLoop]: - if not start.is_self_loop(): + # We detect edges that are accompanied by their reverse as loops. + if start.is_self_loop(): + + loop_body = Body(False, []) + emit_node(context, start, loop_body, indent + 4) + elif ( + isinstance(start.conditional_edge, ConditionalNode) + and start.conditional_edge.conditional_edge is start + # we only want to go through loops in one direction. + and not start.is_loop() + ): + loop_body = Body(False, []) + emit_node(context, start, loop_body, indent + 4) + loop_body = build_flowgraph_between( + context, start.fallthrough_edge, start.conditional_edge, indent + 4 + ) + emit_node(context, start.conditional_edge, loop_body, indent + 4) + else: return None assert start.block.block_info assert start.block.block_info.branch_condition - - loop_body = Body(False, []) - emit_node(context, start, loop_body, indent + 4) - do_while = DoWhileLoop( + return DoWhileLoop( indent, context.options.coding_style, loop_body, start.block.block_info.branch_condition, ) - return do_while def build_flowgraph_between( @@ -595,7 +608,8 @@ def build_flowgraph_between( ) if do_while_loop: body.add_do_while_loop(do_while_loop) - curr_start = curr_start.fallthrough_edge + assert isinstance(curr_start.conditional_edge, ConditionalNode) + curr_start = curr_start.conditional_edge.fallthrough_edge continue # If a node is ever encountered twice, we can emit a goto to the diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 607821d9..c45d984d 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -8,21 +8,15 @@ s32 test(s32 arg0) sp8 = 0; if (spC < arg0) { -loop_1: - sp4 = 0; - if (sp4 < arg0) + do { do { sp8 = sp8 + (spC * sp4); sp4 = sp4 + 1; } while ((sp4 < arg0) != 0); - } - spC = spC + 1; - if (spC < arg0) - { - goto loop_1; - } + spC = spC + 1; + } while ((sp4 < arg0) == 0); } return sp8; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 60b2f539..b34ba438 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -7,38 +7,32 @@ s32 test(s32 arg0) s32 phi_v1; s32 phi_v1_2; s32 phi_v1_3; - s32 phi_v1_4; s32 phi_a2; + s32 phi_v1_4; phi_v0 = 0; phi_v1 = 0; - phi_v1_3 = 0; + phi_v1_4 = 0; if (arg0 > 0) { -loop_1: - phi_v1_2 = phi_v1_3; - if (arg0 > 0) + do { phi_a3 = 1; - phi_v1_4 = phi_v1_3; + phi_v1_3 = phi_v1_4; phi_a2 = phi_v0 * 0; do { - temp_v1 = phi_v1_4 + phi_a2; + temp_v1 = phi_v1_3 + phi_a2; phi_a3 = phi_a3 + 1; phi_v1_2 = temp_v1; - phi_v1_4 = temp_v1; + phi_v1_3 = temp_v1; phi_a2 = phi_a2 + phi_v0; } while (arg0 != phi_a3); - } - temp_v0 = phi_v0 + 1; - phi_v0 = temp_v0; - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; - if (temp_v0 != arg0) - { - goto loop_1; - } + temp_v0 = phi_v0 + 1; + phi_v0 = temp_v0; + phi_v1 = phi_v1_2; + phi_v1_4 = phi_v1_2; + } while (arg0 <= 0); } return phi_v1; } From c359b3b3120705fff1413740e457d4e3e61fa454 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 21:28:34 -0700 Subject: [PATCH 29/54] Clean up loop detection a bit --- src/if_statements.py | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 15e66940..579f7c41 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -547,28 +547,26 @@ def add_return_statement( body.add_statement(SimpleStatement(indent, "return;")) -def pattern_match_simple_do_while_loop( +def detect_loop( context: Context, start: ConditionalNode, indent: int ) -> Optional[DoWhileLoop]: # We detect edges that are accompanied by their reverse as loops. - if start.is_self_loop(): - - loop_body = Body(False, []) - emit_node(context, start, loop_body, indent + 4) - elif ( + if not ( isinstance(start.conditional_edge, ConditionalNode) and start.conditional_edge.conditional_edge is start - # we only want to go through loops in one direction. - and not start.is_loop() ): - loop_body = Body(False, []) - emit_node(context, start, loop_body, indent + 4) + return None + + loop_body = Body(False, []) + emit_node(context, start, loop_body, indent + 4) + + if not start.is_self_loop(): + # There are more nodes to emit, "between" the start node + # and the loop edge that it connects to: loop_body = build_flowgraph_between( context, start.fallthrough_edge, start.conditional_edge, indent + 4 ) emit_node(context, start.conditional_edge, loop_body, indent + 4) - else: - return None assert start.block.block_info assert start.block.block_info.branch_condition @@ -603,12 +601,11 @@ def build_flowgraph_between( # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: if isinstance(curr_start, ConditionalNode): - do_while_loop = pattern_match_simple_do_while_loop( - context, curr_start, indent - ) + do_while_loop = detect_loop(context, curr_start, indent) if do_while_loop: body.add_do_while_loop(do_while_loop) assert isinstance(curr_start.conditional_edge, ConditionalNode) + # Move past the loop: curr_start = curr_start.conditional_edge.fallthrough_edge continue From 8073b65528c78124ff5aed76b32befff92bc57ec Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 22:09:22 -0700 Subject: [PATCH 30/54] Use imm.pdom heuristic for loop detection --- src/if_statements.py | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 579f7c41..be06da06 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -548,12 +548,17 @@ def add_return_statement( def detect_loop( - context: Context, start: ConditionalNode, indent: int + context: Context, start: ConditionalNode, end: Node, indent: int ) -> Optional[DoWhileLoop]: # We detect edges that are accompanied by their reverse as loops. + imm_pdom: Node + if start.is_self_loop(): + imm_pdom = start + else: + imm_pdom = immediate_postdominator(context, start, end) + if not ( - isinstance(start.conditional_edge, ConditionalNode) - and start.conditional_edge.conditional_edge is start + isinstance(imm_pdom, ConditionalNode) and imm_pdom.conditional_edge is start ): return None @@ -564,22 +569,26 @@ def detect_loop( # There are more nodes to emit, "between" the start node # and the loop edge that it connects to: loop_body = build_flowgraph_between( - context, start.fallthrough_edge, start.conditional_edge, indent + 4 + context, start, imm_pdom, indent + 4, skip_loop_detection=True, ) - emit_node(context, start.conditional_edge, loop_body, indent + 4) + emit_node(context, imm_pdom, loop_body, indent + 4) - assert start.block.block_info - assert start.block.block_info.branch_condition + assert imm_pdom.block.block_info + assert imm_pdom.block.block_info.branch_condition return DoWhileLoop( indent, context.options.coding_style, loop_body, - start.block.block_info.branch_condition, + imm_pdom.block.block_info.branch_condition, ) def build_flowgraph_between( - context: Context, start: Node, end: Node, indent: int + context: Context, + start: Node, + end: Node, + indent: int, + skip_loop_detection: bool = False, ) -> Body: """ Output a section of a flow graph that has already been translated to our @@ -600,13 +609,17 @@ def build_flowgraph_between( # Before we do anything else, we pattern-match the subgraph # rooted at curr_start against certain predefined subgraphs # that emit do-while-loops: - if isinstance(curr_start, ConditionalNode): - do_while_loop = detect_loop(context, curr_start, indent) + if not skip_loop_detection and isinstance(curr_start, ConditionalNode): + do_while_loop = detect_loop(context, curr_start, end, indent) if do_while_loop: body.add_do_while_loop(do_while_loop) - assert isinstance(curr_start.conditional_edge, ConditionalNode) # Move past the loop: - curr_start = curr_start.conditional_edge.fallthrough_edge + if curr_start.is_self_loop(): + curr_start = curr_start.fallthrough_edge + else: + imm_pdom = immediate_postdominator(context, curr_start, end) + assert isinstance(imm_pdom, ConditionalNode) + curr_start = imm_pdom.fallthrough_edge continue # If a node is ever encountered twice, we can emit a goto to the From ce9ba4e6cf074ee69b06c1b933906283aa8f3e54 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Sat, 2 May 2020 22:09:44 -0700 Subject: [PATCH 31/54] Actually run tests --- tests/end_to_end/loop_nested/irix-g-out.c | 13 +++++--- tests/end_to_end/loop_nested/irix-o2-out.c | 31 ++++++++++--------- tests/end_to_end/loop_with_if/irix-g-out.c | 16 +++++----- .../loop_with_if/irix-o2-allman-out.c | 26 +++++++--------- tests/end_to_end/loop_with_if/irix-o2-out.c | 20 ++++++------ 5 files changed, 53 insertions(+), 53 deletions(-) diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 4bf409c3..82c524e3 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -7,12 +7,15 @@ s32 test(s32 arg0) { sp8 = 0; if (spC < arg0) { do { - do { - sp8 = sp8 + (spC * sp4); - sp4 = sp4 + 1; - } while ((sp4 < arg0) != 0); + sp4 = 0; + if (sp4 < arg0) { + do { + sp8 = sp8 + (spC * sp4); + sp4 = sp4 + 1; + } while ((sp4 < arg0) != 0); + } spC = spC + 1; - } while ((sp4 < arg0) == 0); + } while ((spC < arg0) != 0); } return sp8; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 8c9f20b3..10f4657d 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -6,29 +6,32 @@ s32 test(s32 arg0) { s32 phi_v1; s32 phi_v1_2; s32 phi_v1_3; - s32 phi_v1_4; s32 phi_a2; + s32 phi_v1_4; phi_v0 = 0; phi_v1 = 0; - phi_v1_3 = 0; + phi_v1_4 = 0; if (arg0 > 0) { do { - phi_a3 = 1; - phi_v1_4 = phi_v1_3; - phi_a2 = phi_v0 * 0; - do { - temp_v1 = phi_v1_4 + phi_a2; - phi_a3 = phi_a3 + 1; - phi_v1_2 = temp_v1; - phi_v1_4 = temp_v1; - phi_a2 = phi_a2 + phi_v0; - } while (arg0 != phi_a3); + phi_v1_2 = phi_v1_4; + if (arg0 > 0) { + phi_a3 = 1; + phi_v1_3 = phi_v1_4; + phi_a2 = phi_v0 * 0; + do { + temp_v1 = phi_v1_3 + phi_a2; + phi_a3 = phi_a3 + 1; + phi_v1_2 = temp_v1; + phi_v1_3 = temp_v1; + phi_a2 = phi_a2 + phi_v0; + } while (arg0 != phi_a3); + } temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; - } while (arg0 <= 0); + phi_v1_4 = phi_v1_2; + } while (temp_v0 != arg0); } return phi_v1; } diff --git a/tests/end_to_end/loop_with_if/irix-g-out.c b/tests/end_to_end/loop_with_if/irix-g-out.c index c505b6a9..154614cd 100644 --- a/tests/end_to_end/loop_with_if/irix-g-out.c +++ b/tests/end_to_end/loop_with_if/irix-g-out.c @@ -3,15 +3,13 @@ s32 test(s32 arg0) { sp4 = 0; if (sp4 < arg0) { -loop_1: - if (sp4 == 5) { - sp4 = sp4 * 2; - } else { - sp4 = sp4 + 4; - } - if (sp4 < arg0) { - goto loop_1; - } + do { + if (sp4 == 5) { + sp4 = sp4 * 2; + } else { + sp4 = sp4 + 4; + } + } while ((sp4 < arg0) != 0); } return sp4; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c index 2e1c2503..326b5284 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c @@ -8,21 +8,19 @@ s32 test(s32 arg0) if (arg0 > 0) { phi_v1 = 0; -loop_2: - if (phi_v1 == 5) + do { - phi_v1_2 = phi_v1 * 2; - } - else - { - phi_v1_2 = phi_v1 + 4; - } - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; - if (phi_v1_2 < arg0) - { - goto loop_2; - } + if (phi_v1 == 5) + { + phi_v1_2 = phi_v1 * 2; + } + else + { + phi_v1_2 = phi_v1 + 4; + } + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; + } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-out.c b/tests/end_to_end/loop_with_if/irix-o2-out.c index 8afd0d39..cb147c9d 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-out.c @@ -6,17 +6,15 @@ s32 test(s32 arg0) { phi_v1_3 = 0; if (arg0 > 0) { phi_v1 = 0; -loop_2: - if (phi_v1 == 5) { - phi_v1_2 = phi_v1 * 2; - } else { - phi_v1_2 = phi_v1 + 4; - } - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; - if (phi_v1_2 < arg0) { - goto loop_2; - } + do { + if (phi_v1 == 5) { + phi_v1_2 = phi_v1 * 2; + } else { + phi_v1_2 = phi_v1 + 4; + } + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; + } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; } From cc67261ff013d6da6e108af68201541446b01ab2 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Mon, 4 May 2020 23:24:18 -0700 Subject: [PATCH 32/54] Move parent generation alongside dominators --- src/flow_graph.py | 38 ++++++++++++++----- ...{flow_graph_munge.py => loop_rerolling.py} | 34 ++++++++--------- src/main.py | 6 +-- 3 files changed, 48 insertions(+), 30 deletions(-) rename src/{flow_graph_munge.py => loop_rerolling.py} (78%) diff --git a/src/flow_graph.py b/src/flow_graph.py index 31783dcb..9b7c14e7 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -509,7 +509,7 @@ def process(item: Union[Instruction, Label]) -> None: def is_self_loop_edge(node: "Node", edge: "Node") -> bool: - return edge.block.index == node.block.index + return node is edge def is_loop_edge(node: "Node", edge: "Node") -> bool: @@ -542,14 +542,15 @@ def to_basic_node(self, successor: "Node") -> "BasicNode": def add_parent(self, parent: "Node") -> None: self.parents.add(parent) - def remove_parent(self, parent: "Node") -> None: - self.parents.remove(parent) - def replace_parent(self, replace_this: "Node", with_this: "Node") -> None: if replace_this in self.parents: self.parents.remove(replace_this) self.parents.add(with_this) + @abc.abstractmethod + def children(self) -> List["Node"]: + ... + @abc.abstractmethod def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: ... @@ -565,11 +566,13 @@ class BasicNode(BaseNode): def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: if self.successor is replace_this: self.successor = with_this - with_this.add_parent(self) def is_loop(self) -> bool: return is_loop_edge(self, self.successor) + def children(self) -> List["Node"]: + return [self.successor] + def __str__(self) -> str: return "".join( [ @@ -588,10 +591,8 @@ class ConditionalNode(BaseNode): def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: if self.conditional_edge is replace_this: self.conditional_edge = with_this - with_this.add_parent(self) if self.fallthrough_edge is replace_this: self.fallthrough_edge = with_this - with_this.add_parent(self) def is_self_loop(self) -> bool: return is_self_loop_edge(self, self.conditional_edge) @@ -599,6 +600,9 @@ def is_self_loop(self) -> bool: def is_loop(self) -> bool: return is_loop_edge(self, self.conditional_edge) + def children(self) -> List["Node"]: + return [self.conditional_edge, self.fallthrough_edge] + def __str__(self) -> str: return "".join( [ @@ -623,6 +627,9 @@ def name(self) -> str: name = super().name() return name if self.is_real() else f"{name}.{self.index}" + def children(self) -> List["Node"]: + return [] + def is_real(self) -> bool: return self.index == 0 @@ -644,6 +651,9 @@ def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: new_cases.append(case) self.cases = new_cases + def children(self) -> List["Node"]: + return self.cases + def __str__(self) -> str: targets = ", ".join(str(c.block.index) for c in self.cases) return f"{self.block}\n# {self.block.index} -> {targets}" @@ -911,11 +921,21 @@ def ensure_fallthrough(nodes: List[Node]) -> None: pre.emit_goto = True -def compute_dominators(nodes: List[Node]) -> None: +def compute_parents(nodes: List[Node]) -> None: + for node in nodes: + node.parents = set() + for node in nodes: + for child in node.children(): + child.parents.add(node) + + +def compute_dominators_and_parents(nodes: List[Node]) -> None: + """Compute or recompute the dominators and parents of the given nodes.""" for node in nodes: node.dominators = set() node.immediate_dominator = None node.immediately_dominates = [] + compute_parents(nodes) entry = nodes[0] entry.dominators = {entry} @@ -964,7 +984,7 @@ def build_flowgraph(function: Function, rodata: Rodata) -> FlowGraph: nodes = build_nodes(function, blocks, rodata) nodes = duplicate_premature_returns(nodes) ensure_fallthrough(nodes) - compute_dominators(nodes) + compute_dominators_and_parents(nodes) return FlowGraph(nodes) diff --git a/src/flow_graph_munge.py b/src/loop_rerolling.py similarity index 78% rename from src/flow_graph_munge.py rename to src/loop_rerolling.py index 01a6039b..9667f456 100644 --- a/src/flow_graph_munge.py +++ b/src/loop_rerolling.py @@ -1,34 +1,34 @@ from typing import List -from .flow_graph import BasicNode, ConditionalNode, FlowGraph, Node, compute_dominators +from .flow_graph import ( + BasicNode, + ConditionalNode, + FlowGraph, + Node, + compute_dominators_and_parents, +) from .parse_instruction import Instruction def replace_node_references( - flow_graph: FlowGraph, replace_this: Node, with_this: Node, replace_parent: bool + flow_graph: FlowGraph, replace_this: Node, with_this: Node ) -> None: for node_to_modify in flow_graph.nodes: node_to_modify.replace_any_children(replace_this, with_this) - if replace_this in node_to_modify.parents: - if replace_parent: - node_to_modify.replace_parent(replace_this, with_this) - else: - node_to_modify.remove_parent(replace_this) - compute_dominators(flow_graph.nodes) def remove_node(flow_graph: FlowGraph, to_delete: Node, new_child: Node) -> None: flow_graph.nodes.remove(to_delete) - replace_node_references(flow_graph, to_delete, new_child, False) + replace_node_references(flow_graph, to_delete, new_child) def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> None: replacement_index = flow_graph.nodes.index(replace_this) flow_graph.nodes[replacement_index] = with_this - replace_node_references(flow_graph, replace_this, with_this, True) + replace_node_references(flow_graph, replace_this, with_this) -def unroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: +def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: node_1 = start.fallthrough_edge node_7 = start.conditional_edge @@ -94,11 +94,12 @@ def modify_node_1_instructions(instructions: List[Instruction]) -> bool: replace_node(flow_graph, node_1, new_node_1) # now it does remove_node(flow_graph, node_4, node_7) remove_node(flow_graph, node_5, node_7) - remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything? + remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything?. + return True -def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: +def reroll_loops(flow_graph: FlowGraph) -> FlowGraph: # TODO: What if knocking out nodes reveals another set of nodes # that look identical? We will incorrectly be merging two # adjacent for-loops. @@ -108,11 +109,8 @@ def munge_unrolled_loops(flow_graph: FlowGraph) -> FlowGraph: for node in flow_graph.nodes: if not isinstance(node, ConditionalNode): continue - changed = unroll_loop(flow_graph, node) + changed = reroll_loop(flow_graph, node) if changed: break + compute_dominators_and_parents(flow_graph.nodes) return flow_graph - - -def munge_flowgraph(flow_graph: FlowGraph) -> FlowGraph: - return munge_unrolled_loops(flow_graph) diff --git a/src/main.py b/src/main.py index 324ea1d3..828dc30b 100644 --- a/src/main.py +++ b/src/main.py @@ -5,7 +5,7 @@ from .c_types import TypeMap, build_typemap, dump_typemap from .error import DecompFailure from .flow_graph import FlowGraph, build_flowgraph, visualize_flowgraph -from .flow_graph_munge import munge_flowgraph +from .loop_rerolling import reroll_loops from .if_statements import get_function_text from .options import CodingStyle, Options from .parse_file import Function, MIPSFile, Rodata, parse_file @@ -22,7 +22,7 @@ def decompile_function( flowgraph: FlowGraph = build_flowgraph(function, rodata) if options.loop_rerolling: - flowgraph = munge_flowgraph(flowgraph) + flowgraph = reroll_loops(flowgraph) if options.visualize_flowgraph: visualize_flowgraph(flowgraph) @@ -126,7 +126,7 @@ def parse_flags(flags: List[str]) -> Options: parser.add_argument( "--no-reroll", dest="loop_rerolling", - help="disable emitting for-loops by un-unrolling (rerolling) while-loops", + help="disable detection and fixing of unrolled loops", action="store_false", ) parser.add_argument( From 90c8f2c64fffedfbbd2afad6cf55c0287b9ff397 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 20:25:32 -0700 Subject: [PATCH 33/54] Fix indentation of do-while loops --- src/if_statements.py | 15 ++++++----- tests/end_to_end/andor_return/irix-g.s | 27 +++++++++++++++++++ tests/end_to_end/andor_return/irix-o2.s | 26 ++++++++++++++++++ tests/end_to_end/andor_return/orig.c | 6 +++++ tests/end_to_end/loop_nested/irix-g-out.c | 4 +-- tests/end_to_end/loop_nested/irix-o2-out.c | 18 ++++++------- tests/end_to_end/loop_with_if/irix-g-out.c | 6 ++--- .../loop_with_if/irix-o2-allman-out.c | 6 ++--- tests/end_to_end/loop_with_if/irix-o2-out.c | 10 +++---- 9 files changed, 89 insertions(+), 29 deletions(-) create mode 100644 tests/end_to_end/andor_return/irix-g.s create mode 100644 tests/end_to_end/andor_return/irix-o2.s create mode 100644 tests/end_to_end/andor_return/orig.c diff --git a/src/if_statements.py b/src/if_statements.py index 87e48555..055e3008 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -166,13 +166,14 @@ def format(self, fmt: Formatter) -> str: space = fmt.indent("") after_do = f"\n{space}" if fmt.coding_style.newline_after_if else " " cond = format_expr(self.condition, fmt) if self.condition else "" - return "\n".join( - [ - f"{space}do{after_do}{{", - self.body.format(fmt), - f"{space}}} while ({cond});", - ] - ) + with fmt.indented(): + return "\n".join( + [ + f"{space}do{after_do}{{", + self.body.format(fmt), + f"{space}}} while ({cond});", + ] + ) Statement = Union[ diff --git a/tests/end_to_end/andor_return/irix-g.s b/tests/end_to_end/andor_return/irix-g.s new file mode 100644 index 00000000..dd84396a --- /dev/null +++ b/tests/end_to_end/andor_return/irix-g.s @@ -0,0 +1,27 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 000090 00400090 14800003 */ bnez $a0, .L004000A0 +/* 000094 00400094 00000000 */ nop +/* 000098 00400098 10A00007 */ beqz $a1, .L004000B8 +/* 00009C 0040009C 00000000 */ nop +.L004000A0: +/* 0000A0 004000A0 14C00003 */ bnez $a2, .L004000B0 +/* 0000A4 004000A4 00000000 */ nop +/* 0000A8 004000A8 10E00003 */ beqz $a3, .L004000B8 +/* 0000AC 004000AC 00000000 */ nop +.L004000B0: +/* 0000B0 004000B0 03E00008 */ jr $ra +/* 0000B4 004000B4 00851021 */ addu $v0, $a0, $a1 + +.L004000B8: +/* 0000B8 004000B8 03E00008 */ jr $ra +/* 0000BC 004000BC 00C71021 */ addu $v0, $a2, $a3 + +/* 0000C0 004000C0 03E00008 */ jr $ra +/* 0000C4 004000C4 00000000 */ nop + +/* 0000C8 004000C8 03E00008 */ jr $ra +/* 0000CC 004000CC 00000000 */ nop diff --git a/tests/end_to_end/andor_return/irix-o2.s b/tests/end_to_end/andor_return/irix-o2.s new file mode 100644 index 00000000..8131ee17 --- /dev/null +++ b/tests/end_to_end/andor_return/irix-o2.s @@ -0,0 +1,26 @@ +.set noat # allow manual use of $at +.set noreorder # don't insert nops after branches + + +glabel test +/* 000090 00400090 14800003 */ bnez $a0, .L004000A0 +/* 000094 00400094 00000000 */ nop +/* 000098 00400098 50A00008 */ beql $a1, $zero, .L004000BC +/* 00009C 0040009C 00C71021 */ addu $v0, $a2, $a3 +.L004000A0: +/* 0000A0 004000A0 14C00003 */ bnez $a2, .L004000B0 +/* 0000A4 004000A4 00000000 */ nop +/* 0000A8 004000A8 50E00004 */ beql $a3, $zero, .L004000BC +/* 0000AC 004000AC 00C71021 */ addu $v0, $a2, $a3 +.L004000B0: +/* 0000B0 004000B0 03E00008 */ jr $ra +/* 0000B4 004000B4 00851021 */ addu $v0, $a0, $a1 + +/* 0000B8 004000B8 00C71021 */ addu $v0, $a2, $a3 +.L004000BC: +/* 0000BC 004000BC 03E00008 */ jr $ra +/* 0000C0 004000C0 00000000 */ nop + +/* 0000C4 004000C4 00000000 */ nop +/* 0000C8 004000C8 00000000 */ nop +/* 0000CC 004000CC 00000000 */ nop diff --git a/tests/end_to_end/andor_return/orig.c b/tests/end_to_end/andor_return/orig.c new file mode 100644 index 00000000..79c9e54d --- /dev/null +++ b/tests/end_to_end/andor_return/orig.c @@ -0,0 +1,6 @@ +int test(int a, int b, int c, int d) { + if ((a || b) && (c || d)) { + return a + b; + } + return c + d; +} diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 478ef09a..90fc6dbb 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -7,8 +7,8 @@ s32 test(s32 arg0) { sp8 = 0; if (spC < arg0) { do { - goto loop_1; - spC += 1; + goto loop_1; + spC += 1; } while ((spC < arg0) != 0); } return sp8; diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index d36c6799..67e42164 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -14,18 +14,14 @@ s32 test(s32 arg0) { phi_v1_3 = 0; if (arg0 > 0) { do { - goto loop_1; - temp_v0 = phi_v0 + 1; - phi_v0 = temp_v0; - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; + goto loop_1; + temp_v0 = phi_v0 + 1; + phi_v0 = temp_v0; + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; } while (temp_v0 != arg0); } return phi_v1; - // bug: did not emit code for node #3; contents below: - phi_a3 = 1; - phi_v1_4 = phi_v1_3; - phi_a2 = phi_v0 * 0; // bug: did not emit code for node #4; contents below: temp_v1 = phi_v1_4 + phi_a2; phi_a3 += 1; @@ -33,4 +29,8 @@ s32 test(s32 arg0) { phi_v1_4 = temp_v1; phi_a2 += phi_v0; // bug: did not emit code for node #2; contents below: + // bug: did not emit code for node #3; contents below: + phi_a3 = 1; + phi_v1_4 = phi_v1_3; + phi_a2 = phi_v0 * 0; } diff --git a/tests/end_to_end/loop_with_if/irix-g-out.c b/tests/end_to_end/loop_with_if/irix-g-out.c index d8b75811..708cf915 100644 --- a/tests/end_to_end/loop_with_if/irix-g-out.c +++ b/tests/end_to_end/loop_with_if/irix-g-out.c @@ -4,12 +4,12 @@ s32 test(s32 arg0) { sp4 = 0; if (sp4 < arg0) { do { - goto loop_1; + goto loop_1; } while ((sp4 < arg0) != 0); } return sp4; - // bug: did not emit code for node #2; contents below: - sp4 *= 2; // bug: did not emit code for node #3; contents below: sp4 += 4; + // bug: did not emit code for node #2; contents below: + sp4 *= 2; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c index c1326b30..4c43534c 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c @@ -10,9 +10,9 @@ s32 test(s32 arg0) phi_v1 = 0; do { - goto loop_2; - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; + goto loop_2; + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; diff --git a/tests/end_to_end/loop_with_if/irix-o2-out.c b/tests/end_to_end/loop_with_if/irix-o2-out.c index 27edf989..f61a36ca 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-out.c @@ -7,14 +7,14 @@ s32 test(s32 arg0) { if (arg0 > 0) { phi_v1 = 0; do { - goto loop_2; - phi_v1 = phi_v1_2; - phi_v1_3 = phi_v1_2; + goto loop_2; + phi_v1 = phi_v1_2; + phi_v1_3 = phi_v1_2; } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; - // bug: did not emit code for node #4; contents below: - phi_v1_2 = phi_v1 + 4; // bug: did not emit code for node #3; contents below: phi_v1_2 = phi_v1 * 2; + // bug: did not emit code for node #4; contents below: + phi_v1_2 = phi_v1 + 4; } From 43790214517b57ff99e1d9425b31a3c9377ceb4c Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 20:45:28 -0700 Subject: [PATCH 34/54] Fix self_loop definition --- src/flow_graph.py | 2 +- src/if_statements.py | 2 + .../end_to_end/andor_assignment/irix-g-out.c | 20 ++++------ .../andor_assignment/irix-o2-noandor-out.c | 24 +++++------- .../end_to_end/andor_assignment/irix-o2-out.c | 24 +++++------- tests/end_to_end/loop/irix-g-out.c | 12 +++--- tests/end_to_end/loop/irix-o2-no-reroll-out.c | 38 +++++++++---------- tests/end_to_end/loop/irix-o2-out.c | 16 ++++---- tests/end_to_end/loop_nested/irix-o2-out.c | 10 ++--- tests/end_to_end/loop_with_if/irix-g-out.c | 4 +- tests/end_to_end/loop_with_if/irix-o2-out.c | 4 +- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 20 +++++----- .../end_to_end/multiple-assigns/irix-g-out.c | 38 +++++++++---------- .../end_to_end/multiple-assigns/irix-o2-out.c | 36 +++++++++--------- 14 files changed, 113 insertions(+), 137 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 2b9ea2fc..644a1a1c 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -957,7 +957,7 @@ class NaturalLoop: backedges: Set[Node] = attr.ib(factory=set) def is_self_loop(self) -> bool: - return False # TODO + return len(self.nodes) == 1 def build_graph_from_block( diff --git a/src/if_statements.py b/src/if_statements.py index 055e3008..837f67d2 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -688,8 +688,10 @@ def build_flowgraph_between( assert not isinstance(curr_start, TerminalNode) if not skip_loop_detection and isinstance(curr_start, ConditionalNode): + # breakpoint() do_while_loop = detect_loop(context, curr_start, end) if do_while_loop: + # breakpoint() body.add_do_while_loop(do_while_loop) # Move past the loop: if curr_start.loop and curr_start.loop.is_self_loop(): diff --git a/tests/end_to_end/andor_assignment/irix-g-out.c b/tests/end_to_end/andor_assignment/irix-g-out.c index 1b7be5dc..b4ee7c12 100644 --- a/tests/end_to_end/andor_assignment/irix-g-out.c +++ b/tests/end_to_end/andor_assignment/irix-g-out.c @@ -22,24 +22,20 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { sp20 = func_00400090(temp_t6); if ((sp20 != 0) && (arg3 != 0)) { if (sp1C < 5) { -loop_13: - sp1C += 1; - sp1C *= 2; - if (sp1C < 5) { - goto loop_13; - } + do { + sp1C += 1; + sp1C *= 2; + } while ((sp1C < 5) != 0); } sp1C += 5; } } if ((sp24 != 0) && (sp20 != 0) && (temp_t9 = sp24 + sp20, sp24 = temp_t9, sp20 = func_00400090(temp_t9), (sp20 != 0)) && (arg3 != 0)) { if (sp1C < 5) { -loop_20: - sp1C += 1; - sp1C *= 2; - if (sp1C < 5) { - goto loop_20; - } + do { + sp1C += 1; + sp1C *= 2; + } while ((sp1C < 5) != 0); } sp1C += 5; } else { diff --git a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c index 4c44b2b1..10e69b0b 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c @@ -81,13 +81,11 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_2 = temp_v1; phi_v1_6 = temp_v1; if (temp_v1 < 5) { -loop_12: - temp_t5 = (phi_v1_2 + 1) * 2; - phi_v1_2 = temp_t5; - phi_v1_6 = temp_t5; - if (temp_t5 < 5) { - goto loop_12; - } + do { + temp_t5 = (phi_v1_2 + 1) * 2; + phi_v1_2 = temp_t5; + phi_v1_6 = temp_t5; + } while ((temp_t5 < 5) != 0); } phi_t1_2 = temp_v0_2; phi_v1_3 = phi_v1_6 + 5; @@ -105,13 +103,11 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_4 = phi_v1_3; phi_v1_7 = phi_v1_3; if (phi_v1_3 < 5) { -loop_19: - temp_t9 = (phi_v1_4 + 1) * 2; - phi_v1_4 = temp_t9; - phi_v1_7 = temp_t9; - if (temp_t9 < 5) { - goto loop_19; - } + do { + temp_t9 = (phi_v1_4 + 1) * 2; + phi_v1_4 = temp_t9; + phi_v1_7 = temp_t9; + } while ((temp_t9 < 5) != 0); } phi_v1_5 = phi_v1_7 + 5; } else { diff --git a/tests/end_to_end/andor_assignment/irix-o2-out.c b/tests/end_to_end/andor_assignment/irix-o2-out.c index 2d83c39a..1900a53d 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-out.c @@ -64,13 +64,11 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_2 = temp_v1; phi_v1_6 = temp_v1; if (temp_v1 < 5) { -loop_12: - temp_t3 = (phi_v1_2 + 1) * 2; - phi_v1_2 = temp_t3; - phi_v1_6 = temp_t3; - if (temp_t3 < 5) { - goto loop_12; - } + do { + temp_t3 = (phi_v1_2 + 1) * 2; + phi_v1_2 = temp_t3; + phi_v1_6 = temp_t3; + } while ((temp_t3 < 5) != 0); } phi_s0_2 = temp_s0_2; phi_t0_2 = temp_v0_2; @@ -83,13 +81,11 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_4 = phi_v1_3; phi_v1_7 = phi_v1_3; if (phi_v1_3 < 5) { -loop_19: - temp_t5 = (phi_v1_4 + 1) * 2; - phi_v1_4 = temp_t5; - phi_v1_7 = temp_t5; - if (temp_t5 < 5) { - goto loop_19; - } + do { + temp_t5 = (phi_v1_4 + 1) * 2; + phi_v1_4 = temp_t5; + phi_v1_7 = temp_t5; + } while ((temp_t5 < 5) != 0); } phi_v1_5 = phi_v1_7 + 5; } else { diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index f1a7d35b..0fc2ff25 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -4,12 +4,10 @@ void test(s32 arg0, s32 arg1) { sp4 = 0; if (arg1 > 0) { -loop_1: - *(arg0 + sp4) = (u8)0; - temp_t9 = sp4 + 1; - sp4 = temp_t9; - if (temp_t9 < arg1) { - goto loop_1; - } + do { + *(arg0 + sp4) = (u8)0; + temp_t9 = sp4 + 1; + sp4 = temp_t9; + } while ((temp_t9 < arg1) != 0); } } diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-reroll-out.c index 3c783b8f..684487af 100644 --- a/tests/end_to_end/loop/irix-o2-no-reroll-out.c +++ b/tests/end_to_end/loop/irix-o2-no-reroll-out.c @@ -17,33 +17,29 @@ s32 test(s8 *arg0, s32 arg1) { if (temp_a3 != 0) { phi_v1 = arg0; phi_v0 = 0; -loop_3: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 += 1; - phi_v0 = temp_v0; - if (temp_a3 != temp_v0) { - goto loop_3; - } + do { + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 += 1; + phi_v0 = temp_v0; + } while (temp_a3 != temp_v0); phi_return = temp_v0; phi_v0_3 = temp_v0; if (temp_v0 != arg1) { block_5: phi_v1_2 = arg0 + phi_v0_3; phi_v0_2 = phi_v0_3; -loop_6: - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - if (temp_v0_2 != arg1) { - goto loop_6; - } + do { + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + } while (temp_v0_2 != arg1); } } else { goto block_5; diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 0786eeb7..ab71fb23 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -8,15 +8,13 @@ s32 test(s8 *arg0, s32 arg1) { if (arg1 > 0) { phi_v1 = arg0; phi_v0 = 0; -loop_3: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 += 1; - phi_v0 = temp_v0; - phi_return = temp_v0; - if (arg1 != temp_v0) { - goto loop_3; - } + do { + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 += 1; + phi_v0 = temp_v0; + phi_return = temp_v0; + } while (arg1 != temp_v0); } return phi_return; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 67e42164..a4f451c7 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -22,15 +22,15 @@ s32 test(s32 arg0) { } while (temp_v0 != arg0); } return phi_v1; + // bug: did not emit code for node #2; contents below: + // bug: did not emit code for node #3; contents below: + phi_a3 = 1; + phi_v1_4 = phi_v1_3; + phi_a2 = phi_v0 * 0; // bug: did not emit code for node #4; contents below: temp_v1 = phi_v1_4 + phi_a2; phi_a3 += 1; phi_v1_2 = temp_v1; phi_v1_4 = temp_v1; phi_a2 += phi_v0; - // bug: did not emit code for node #2; contents below: - // bug: did not emit code for node #3; contents below: - phi_a3 = 1; - phi_v1_4 = phi_v1_3; - phi_a2 = phi_v0 * 0; } diff --git a/tests/end_to_end/loop_with_if/irix-g-out.c b/tests/end_to_end/loop_with_if/irix-g-out.c index 708cf915..5ff1c68c 100644 --- a/tests/end_to_end/loop_with_if/irix-g-out.c +++ b/tests/end_to_end/loop_with_if/irix-g-out.c @@ -8,8 +8,8 @@ s32 test(s32 arg0) { } while ((sp4 < arg0) != 0); } return sp4; - // bug: did not emit code for node #3; contents below: - sp4 += 4; // bug: did not emit code for node #2; contents below: sp4 *= 2; + // bug: did not emit code for node #3; contents below: + sp4 += 4; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-out.c b/tests/end_to_end/loop_with_if/irix-o2-out.c index f61a36ca..3637acb2 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-out.c @@ -13,8 +13,8 @@ s32 test(s32 arg0) { } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; - // bug: did not emit code for node #3; contents below: - phi_v1_2 = phi_v1 * 2; // bug: did not emit code for node #4; contents below: phi_v1_2 = phi_v1 + 4; + // bug: did not emit code for node #3; contents below: + phi_v1_2 = phi_v1 * 2; } diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 792a6c50..6c291d44 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -13,17 +13,15 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) { phi_v0 = temp_v0; phi_a0 = (arg2 * 4) + &D_8015F668; phi_v1 = 0; -loop_3: - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - phi_v0 = temp_v0_2; - phi_a0 += 4; - phi_v1 = temp_v1; - phi_return = temp_v0_2; - if (arg1 != temp_v1) { - goto loop_3; - } + do { + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + phi_v0 = temp_v0_2; + phi_a0 += 4; + phi_v1 = temp_v1; + phi_return = temp_v0_2; + } while (arg1 != temp_v1); } return phi_return; } diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 5fdb4e39..23349294 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -10,26 +10,24 @@ s32 test(s32 arg0) { phi_a0 = arg0; if (arg0 == 5) { -loop_1: - D_410150 = phi_a0; - temp_a0 = phi_a0 + 1; - D_410150 = temp_a0; - temp_a0_2 = temp_a0 + 1; - D_410150 = temp_a0_2; - temp_a0_3 = temp_a0_2 + 1; - D_410150 = temp_a0_3; - sp4 = temp_a0_3; - temp_a0_4 = temp_a0_3 + 1; - D_410150 = temp_a0_4; - D_410150 = temp_a0_4; - temp_a0_5 = temp_a0_4 + 1; - D_410150 = temp_a0_5; - temp_a0_6 = temp_a0_5 + 1; - D_410150 = sp4; - phi_a0 = temp_a0_6; - if (temp_a0_6 == 5) { - goto loop_1; - } + do { + D_410150 = phi_a0; + temp_a0 = phi_a0 + 1; + D_410150 = temp_a0; + temp_a0_2 = temp_a0 + 1; + D_410150 = temp_a0_2; + temp_a0_3 = temp_a0_2 + 1; + D_410150 = temp_a0_3; + sp4 = temp_a0_3; + temp_a0_4 = temp_a0_3 + 1; + D_410150 = temp_a0_4; + D_410150 = temp_a0_4; + temp_a0_5 = temp_a0_4 + 1; + D_410150 = temp_a0_5; + temp_a0_6 = temp_a0_5 + 1; + D_410150 = sp4; + phi_a0 = temp_a0_6; + } while (temp_a0_6 == 5); } return sp4; } diff --git a/tests/end_to_end/multiple-assigns/irix-o2-out.c b/tests/end_to_end/multiple-assigns/irix-o2-out.c index 8170f400..88957ba3 100644 --- a/tests/end_to_end/multiple-assigns/irix-o2-out.c +++ b/tests/end_to_end/multiple-assigns/irix-o2-out.c @@ -10,25 +10,23 @@ s32 test(s32 arg0) { if (arg0 == 5) { phi_a0 = arg0; -loop_2: - D_410120 = phi_a0; - temp_a0 = phi_a0 + 1; - D_410120 = temp_a0; - temp_a0_2 = temp_a0 + 1; - D_410120 = temp_a0_2; - temp_a0_3 = temp_a0_2 + 1; - D_410120 = temp_a0_3; - temp_a0_4 = temp_a0_3 + 1; - D_410120 = temp_a0_4; - D_410120 = temp_a0_4; - temp_a0_5 = temp_a0_4 + 1; - D_410120 = temp_a0_5; - temp_a0_6 = temp_a0_5 + 1; - D_410120 = temp_a0_3; - phi_a0 = temp_a0_6; - if (temp_a0_6 == 5) { - goto loop_2; - } + do { + D_410120 = phi_a0; + temp_a0 = phi_a0 + 1; + D_410120 = temp_a0; + temp_a0_2 = temp_a0 + 1; + D_410120 = temp_a0_2; + temp_a0_3 = temp_a0_2 + 1; + D_410120 = temp_a0_3; + temp_a0_4 = temp_a0_3 + 1; + D_410120 = temp_a0_4; + D_410120 = temp_a0_4; + temp_a0_5 = temp_a0_4 + 1; + D_410120 = temp_a0_5; + temp_a0_6 = temp_a0_5 + 1; + D_410120 = temp_a0_3; + phi_a0 = temp_a0_6; + } while (temp_a0_6 == 5); sp4 = temp_a0_3; } return sp4; From a2ed466a68420aec42443ecc506107bb9998b75b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 20:51:22 -0700 Subject: [PATCH 35/54] Minor fixups --- src/flow_graph.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 644a1a1c..3b539b99 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -797,9 +797,6 @@ def children(self) -> List["Node"]: def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: ... - def name(self) -> str: - return str(self.block.index) - @attr.s(eq=False) class BasicNode(BaseNode): @@ -863,7 +860,7 @@ def children(self) -> List["Node"]: return [self.terminal] def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: - pass + return None def name(self) -> str: name = super().name() From fb23b5dbb381bac65ce3fa36f1b6633ee4b64413 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 21:15:47 -0700 Subject: [PATCH 36/54] Fix bug where emitting a node kills that node forever --- src/if_statements.py | 8 +++++--- .../end_to_end/andor_assignment/irix-g-out.c | 6 ++++++ .../andor_assignment/irix-o2-noandor-out.c | 8 ++++++++ .../end_to_end/andor_assignment/irix-o2-out.c | 8 ++++++++ tests/end_to_end/loop/irix-g-out.c | 4 ++++ tests/end_to_end/loop/irix-o2-no-reroll-out.c | 15 +++++++++++++++ tests/end_to_end/loop/irix-o2-out.c | 6 ++++++ tests/end_to_end/loop_nested/irix-g-out.c | 8 +++++++- tests/end_to_end/loop_nested/irix-o2-out.c | 19 +++++++++++++------ tests/end_to_end/loop_with_if/irix-g-out.c | 10 +++++----- .../loop_with_if/irix-o2-allman-out.c | 13 ++++++++----- tests/end_to_end/loop_with_if/irix-o2-out.c | 10 +++++----- tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 8 ++++++++ .../end_to_end/multiple-assigns/irix-g-out.c | 17 +++++++++++++++++ .../end_to_end/multiple-assigns/irix-o2-out.c | 16 ++++++++++++++++ 15 files changed, 131 insertions(+), 25 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 837f67d2..1070a113 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -294,7 +294,7 @@ def label_for_node(context: Context, node: Node) -> str: return f"block_{node.block.index}" -def emit_node(context: Context, node: Node, body: Body) -> bool: +def emit_node(context: Context, node: Node, body: Body, secretly: bool = False) -> bool: """ Try to emit a node for the first time, together with a label for it. The label is only printed if something jumps to it, e.g. a loop. @@ -320,7 +320,8 @@ def emit_node(context: Context, node: Node, body: Body) -> bool: ) else: body.add_statement(LabelStatement(context, node)) - context.emitted_nodes.add(node) + if not secretly: + context.emitted_nodes.add(node) body.add_node(node, comment_empty=True) if isinstance(node, ReturnNode): @@ -634,6 +635,7 @@ def detect_loop( context: Context, start: ConditionalNode, end: Node ) -> Optional[DoWhileLoop]: # We detect edges that are accompanied by their reverse as loops. + # breakpoint() imm_pdom: Node if start.loop and start.loop.is_self_loop(): imm_pdom = start @@ -648,7 +650,7 @@ def detect_loop( return None loop_body = Body(False, []) - emit_node(context, start, loop_body) + emit_node(context, start, loop_body, secretly=True) if not start.loop or not start.loop.is_self_loop(): # There are more nodes to emit, "between" the start node diff --git a/tests/end_to_end/andor_assignment/irix-g-out.c b/tests/end_to_end/andor_assignment/irix-g-out.c index b4ee7c12..82691d2f 100644 --- a/tests/end_to_end/andor_assignment/irix-g-out.c +++ b/tests/end_to_end/andor_assignment/irix-g-out.c @@ -42,4 +42,10 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { sp1C += 6; } return sp1C; + // bug: did not emit code for node #13; contents below: + sp1C += 1; + sp1C *= 2; + // bug: did not emit code for node #20; contents below: + sp1C += 1; + sp1C *= 2; } diff --git a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c index 10e69b0b..814e7f65 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c @@ -124,4 +124,12 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { goto block_21; } return phi_v1_5; + // bug: did not emit code for node #19; contents below: + temp_t9 = (phi_v1_4 + 1) * 2; + phi_v1_4 = temp_t9; + phi_v1_7 = temp_t9; + // bug: did not emit code for node #12; contents below: + temp_t5 = (phi_v1_2 + 1) * 2; + phi_v1_2 = temp_t5; + phi_v1_6 = temp_t5; } diff --git a/tests/end_to_end/andor_assignment/irix-o2-out.c b/tests/end_to_end/andor_assignment/irix-o2-out.c index 1900a53d..2c1a921f 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-out.c @@ -92,4 +92,12 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_5 = phi_v1_3 + 6; } return phi_v1_5; + // bug: did not emit code for node #19; contents below: + temp_t5 = (phi_v1_4 + 1) * 2; + phi_v1_4 = temp_t5; + phi_v1_7 = temp_t5; + // bug: did not emit code for node #12; contents below: + temp_t3 = (phi_v1_2 + 1) * 2; + phi_v1_2 = temp_t3; + phi_v1_6 = temp_t3; } diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index 0fc2ff25..f1da3d72 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -10,4 +10,8 @@ void test(s32 arg0, s32 arg1) { sp4 = temp_t9; } while ((temp_t9 < arg1) != 0); } + // bug: did not emit code for node #1; contents below: + *(arg0 + sp4) = (u8)0; + temp_t9 = sp4 + 1; + sp4 = temp_t9; } diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-reroll-out.c index 684487af..8d1303dd 100644 --- a/tests/end_to_end/loop/irix-o2-no-reroll-out.c +++ b/tests/end_to_end/loop/irix-o2-no-reroll-out.c @@ -46,4 +46,19 @@ s32 test(s8 *arg0, s32 arg1) { } } return phi_return; + // bug: did not emit code for node #6; contents below: + temp_v0_2 = phi_v0_2 + 4; + phi_v1_2->unk1 = (u8)0; + phi_v1_2->unk2 = (u8)0; + phi_v1_2->unk3 = (u8)0; + temp_v1 = phi_v1_2 + 4; + temp_v1->unk-4 = (u8)0; + phi_v1_2 = temp_v1; + phi_v0_2 = temp_v0_2; + phi_return = temp_v0_2; + // bug: did not emit code for node #3; contents below: + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 += 1; + phi_v0 = temp_v0; } diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index ab71fb23..72003b07 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -17,4 +17,10 @@ s32 test(s8 *arg0, s32 arg1) { } while (arg1 != temp_v0); } return phi_return; + // bug: did not emit code for node #3; contents below: + temp_v0 = phi_v0 + 1; + *phi_v1 = (u8)0; + phi_v1 += 1; + phi_v0 = temp_v0; + phi_return = temp_v0; } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 90fc6dbb..761970fc 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -7,7 +7,13 @@ s32 test(s32 arg0) { sp8 = 0; if (spC < arg0) { do { - goto loop_1; + sp4 = 0; + if (sp4 < arg0) { + do { + sp8 += spC * sp4; + sp4 += 1; + } while ((sp4 < arg0) != 0); + } spC += 1; } while ((spC < arg0) != 0); } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index a4f451c7..65d04b3a 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -14,7 +14,19 @@ s32 test(s32 arg0) { phi_v1_3 = 0; if (arg0 > 0) { do { - goto loop_1; + phi_v1_2 = phi_v1_3; + if (arg0 > 0) { + phi_a3 = 1; + phi_v1_4 = phi_v1_3; + phi_a2 = phi_v0 * 0; + do { + temp_v1 = phi_v1_4 + phi_a2; + phi_a3 += 1; + phi_v1_2 = temp_v1; + phi_v1_4 = temp_v1; + phi_a2 += phi_v0; + } while (arg0 != phi_a3); + } temp_v0 = phi_v0 + 1; phi_v0 = temp_v0; phi_v1 = phi_v1_2; @@ -22,11 +34,6 @@ s32 test(s32 arg0) { } while (temp_v0 != arg0); } return phi_v1; - // bug: did not emit code for node #2; contents below: - // bug: did not emit code for node #3; contents below: - phi_a3 = 1; - phi_v1_4 = phi_v1_3; - phi_a2 = phi_v0 * 0; // bug: did not emit code for node #4; contents below: temp_v1 = phi_v1_4 + phi_a2; phi_a3 += 1; diff --git a/tests/end_to_end/loop_with_if/irix-g-out.c b/tests/end_to_end/loop_with_if/irix-g-out.c index 5ff1c68c..60a2e188 100644 --- a/tests/end_to_end/loop_with_if/irix-g-out.c +++ b/tests/end_to_end/loop_with_if/irix-g-out.c @@ -4,12 +4,12 @@ s32 test(s32 arg0) { sp4 = 0; if (sp4 < arg0) { do { - goto loop_1; + if (sp4 == 5) { + sp4 *= 2; + } else { + sp4 += 4; + } } while ((sp4 < arg0) != 0); } return sp4; - // bug: did not emit code for node #2; contents below: - sp4 *= 2; - // bug: did not emit code for node #3; contents below: - sp4 += 4; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c index 4c43534c..326b5284 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-allman-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-allman-out.c @@ -10,14 +10,17 @@ s32 test(s32 arg0) phi_v1 = 0; do { - goto loop_2; + if (phi_v1 == 5) + { + phi_v1_2 = phi_v1 * 2; + } + else + { + phi_v1_2 = phi_v1 + 4; + } phi_v1 = phi_v1_2; phi_v1_3 = phi_v1_2; } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; - // bug: did not emit code for node #4; contents below: - phi_v1_2 = phi_v1 + 4; - // bug: did not emit code for node #3; contents below: - phi_v1_2 = phi_v1 * 2; } diff --git a/tests/end_to_end/loop_with_if/irix-o2-out.c b/tests/end_to_end/loop_with_if/irix-o2-out.c index 3637acb2..cb147c9d 100644 --- a/tests/end_to_end/loop_with_if/irix-o2-out.c +++ b/tests/end_to_end/loop_with_if/irix-o2-out.c @@ -7,14 +7,14 @@ s32 test(s32 arg0) { if (arg0 > 0) { phi_v1 = 0; do { - goto loop_2; + if (phi_v1 == 5) { + phi_v1_2 = phi_v1 * 2; + } else { + phi_v1_2 = phi_v1 + 4; + } phi_v1 = phi_v1_2; phi_v1_3 = phi_v1_2; } while ((phi_v1_2 < arg0) != 0); } return phi_v1_3; - // bug: did not emit code for node #4; contents below: - phi_v1_2 = phi_v1 + 4; - // bug: did not emit code for node #3; contents below: - phi_v1_2 = phi_v1 * 2; } diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index 6c291d44..a92a5c61 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -24,4 +24,12 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) { } while (arg1 != temp_v1); } return phi_return; + // bug: did not emit code for node #3; contents below: + *phi_a0 = phi_v0; + temp_v1 = phi_v1 + 1; + temp_v0_2 = phi_v0 + 0x10; + phi_v0 = temp_v0_2; + phi_a0 += 4; + phi_v1 = temp_v1; + phi_return = temp_v0_2; } diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 23349294..7d823e6d 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -30,4 +30,21 @@ s32 test(s32 arg0) { } while (temp_a0_6 == 5); } return sp4; + // bug: did not emit code for node #1; contents below: + D_410150 = phi_a0; + temp_a0 = phi_a0 + 1; + D_410150 = temp_a0; + temp_a0_2 = temp_a0 + 1; + D_410150 = temp_a0_2; + temp_a0_3 = temp_a0_2 + 1; + D_410150 = temp_a0_3; + sp4 = temp_a0_3; + temp_a0_4 = temp_a0_3 + 1; + D_410150 = temp_a0_4; + D_410150 = temp_a0_4; + temp_a0_5 = temp_a0_4 + 1; + D_410150 = temp_a0_5; + temp_a0_6 = temp_a0_5 + 1; + D_410150 = sp4; + phi_a0 = temp_a0_6; } diff --git a/tests/end_to_end/multiple-assigns/irix-o2-out.c b/tests/end_to_end/multiple-assigns/irix-o2-out.c index 88957ba3..5be2b1e3 100644 --- a/tests/end_to_end/multiple-assigns/irix-o2-out.c +++ b/tests/end_to_end/multiple-assigns/irix-o2-out.c @@ -30,4 +30,20 @@ s32 test(s32 arg0) { sp4 = temp_a0_3; } return sp4; + // bug: did not emit code for node #2; contents below: + D_410120 = phi_a0; + temp_a0 = phi_a0 + 1; + D_410120 = temp_a0; + temp_a0_2 = temp_a0 + 1; + D_410120 = temp_a0_2; + temp_a0_3 = temp_a0_2 + 1; + D_410120 = temp_a0_3; + temp_a0_4 = temp_a0_3 + 1; + D_410120 = temp_a0_4; + D_410120 = temp_a0_4; + temp_a0_5 = temp_a0_4 + 1; + D_410120 = temp_a0_5; + temp_a0_6 = temp_a0_5 + 1; + D_410120 = temp_a0_3; + phi_a0 = temp_a0_6; } From 774d4670728f1b88b30181489519156ea40b4e13 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 21:22:13 -0700 Subject: [PATCH 37/54] Fix bug where nodes were not thought to be emitted --- src/if_statements.py | 7 ++++++- tests/end_to_end/andor_assignment/irix-g-out.c | 6 ------ .../andor_assignment/irix-o2-noandor-out.c | 8 -------- tests/end_to_end/andor_assignment/irix-o2-out.c | 8 -------- tests/end_to_end/loop/irix-g-out.c | 4 ---- tests/end_to_end/loop/irix-o2-no-reroll-out.c | 15 --------------- tests/end_to_end/loop/irix-o2-out.c | 6 ------ tests/end_to_end/loop_nested/irix-g-out.c | 3 --- tests/end_to_end/loop_nested/irix-o2-out.c | 6 ------ tests/end_to_end/mk64_unknown_1/irix-o2-out.c | 8 -------- tests/end_to_end/multiple-assigns/irix-g-out.c | 17 ----------------- tests/end_to_end/multiple-assigns/irix-o2-out.c | 16 ---------------- 12 files changed, 6 insertions(+), 98 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 1070a113..3ae650f9 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -650,7 +650,12 @@ def detect_loop( return None loop_body = Body(False, []) - emit_node(context, start, loop_body, secretly=True) + emit_node( + context, + start, + loop_body, + secretly=(bool(start.loop and not start.loop.is_self_loop())), + ) if not start.loop or not start.loop.is_self_loop(): # There are more nodes to emit, "between" the start node diff --git a/tests/end_to_end/andor_assignment/irix-g-out.c b/tests/end_to_end/andor_assignment/irix-g-out.c index 82691d2f..b4ee7c12 100644 --- a/tests/end_to_end/andor_assignment/irix-g-out.c +++ b/tests/end_to_end/andor_assignment/irix-g-out.c @@ -42,10 +42,4 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { sp1C += 6; } return sp1C; - // bug: did not emit code for node #13; contents below: - sp1C += 1; - sp1C *= 2; - // bug: did not emit code for node #20; contents below: - sp1C += 1; - sp1C *= 2; } diff --git a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c index 814e7f65..10e69b0b 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-noandor-out.c @@ -124,12 +124,4 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { goto block_21; } return phi_v1_5; - // bug: did not emit code for node #19; contents below: - temp_t9 = (phi_v1_4 + 1) * 2; - phi_v1_4 = temp_t9; - phi_v1_7 = temp_t9; - // bug: did not emit code for node #12; contents below: - temp_t5 = (phi_v1_2 + 1) * 2; - phi_v1_2 = temp_t5; - phi_v1_6 = temp_t5; } diff --git a/tests/end_to_end/andor_assignment/irix-o2-out.c b/tests/end_to_end/andor_assignment/irix-o2-out.c index 2c1a921f..1900a53d 100644 --- a/tests/end_to_end/andor_assignment/irix-o2-out.c +++ b/tests/end_to_end/andor_assignment/irix-o2-out.c @@ -92,12 +92,4 @@ s32 test(s32 arg0, s32 arg1, s32 arg2, s32 arg3) { phi_v1_5 = phi_v1_3 + 6; } return phi_v1_5; - // bug: did not emit code for node #19; contents below: - temp_t5 = (phi_v1_4 + 1) * 2; - phi_v1_4 = temp_t5; - phi_v1_7 = temp_t5; - // bug: did not emit code for node #12; contents below: - temp_t3 = (phi_v1_2 + 1) * 2; - phi_v1_2 = temp_t3; - phi_v1_6 = temp_t3; } diff --git a/tests/end_to_end/loop/irix-g-out.c b/tests/end_to_end/loop/irix-g-out.c index f1da3d72..0fc2ff25 100644 --- a/tests/end_to_end/loop/irix-g-out.c +++ b/tests/end_to_end/loop/irix-g-out.c @@ -10,8 +10,4 @@ void test(s32 arg0, s32 arg1) { sp4 = temp_t9; } while ((temp_t9 < arg1) != 0); } - // bug: did not emit code for node #1; contents below: - *(arg0 + sp4) = (u8)0; - temp_t9 = sp4 + 1; - sp4 = temp_t9; } diff --git a/tests/end_to_end/loop/irix-o2-no-reroll-out.c b/tests/end_to_end/loop/irix-o2-no-reroll-out.c index 8d1303dd..684487af 100644 --- a/tests/end_to_end/loop/irix-o2-no-reroll-out.c +++ b/tests/end_to_end/loop/irix-o2-no-reroll-out.c @@ -46,19 +46,4 @@ s32 test(s8 *arg0, s32 arg1) { } } return phi_return; - // bug: did not emit code for node #6; contents below: - temp_v0_2 = phi_v0_2 + 4; - phi_v1_2->unk1 = (u8)0; - phi_v1_2->unk2 = (u8)0; - phi_v1_2->unk3 = (u8)0; - temp_v1 = phi_v1_2 + 4; - temp_v1->unk-4 = (u8)0; - phi_v1_2 = temp_v1; - phi_v0_2 = temp_v0_2; - phi_return = temp_v0_2; - // bug: did not emit code for node #3; contents below: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 += 1; - phi_v0 = temp_v0; } diff --git a/tests/end_to_end/loop/irix-o2-out.c b/tests/end_to_end/loop/irix-o2-out.c index 72003b07..ab71fb23 100644 --- a/tests/end_to_end/loop/irix-o2-out.c +++ b/tests/end_to_end/loop/irix-o2-out.c @@ -17,10 +17,4 @@ s32 test(s8 *arg0, s32 arg1) { } while (arg1 != temp_v0); } return phi_return; - // bug: did not emit code for node #3; contents below: - temp_v0 = phi_v0 + 1; - *phi_v1 = (u8)0; - phi_v1 += 1; - phi_v0 = temp_v0; - phi_return = temp_v0; } diff --git a/tests/end_to_end/loop_nested/irix-g-out.c b/tests/end_to_end/loop_nested/irix-g-out.c index 761970fc..25080ba1 100644 --- a/tests/end_to_end/loop_nested/irix-g-out.c +++ b/tests/end_to_end/loop_nested/irix-g-out.c @@ -18,7 +18,4 @@ s32 test(s32 arg0) { } while ((spC < arg0) != 0); } return sp8; - // bug: did not emit code for node #2; contents below: - sp8 += spC * sp4; - sp4 += 1; } diff --git a/tests/end_to_end/loop_nested/irix-o2-out.c b/tests/end_to_end/loop_nested/irix-o2-out.c index 65d04b3a..7aca39c8 100644 --- a/tests/end_to_end/loop_nested/irix-o2-out.c +++ b/tests/end_to_end/loop_nested/irix-o2-out.c @@ -34,10 +34,4 @@ s32 test(s32 arg0) { } while (temp_v0 != arg0); } return phi_v1; - // bug: did not emit code for node #4; contents below: - temp_v1 = phi_v1_4 + phi_a2; - phi_a3 += 1; - phi_v1_2 = temp_v1; - phi_v1_4 = temp_v1; - phi_a2 += phi_v0; } diff --git a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c index a92a5c61..6c291d44 100644 --- a/tests/end_to_end/mk64_unknown_1/irix-o2-out.c +++ b/tests/end_to_end/mk64_unknown_1/irix-o2-out.c @@ -24,12 +24,4 @@ s32 test(u32 arg0, s32 arg1, s32 arg2) { } while (arg1 != temp_v1); } return phi_return; - // bug: did not emit code for node #3; contents below: - *phi_a0 = phi_v0; - temp_v1 = phi_v1 + 1; - temp_v0_2 = phi_v0 + 0x10; - phi_v0 = temp_v0_2; - phi_a0 += 4; - phi_v1 = temp_v1; - phi_return = temp_v0_2; } diff --git a/tests/end_to_end/multiple-assigns/irix-g-out.c b/tests/end_to_end/multiple-assigns/irix-g-out.c index 7d823e6d..23349294 100644 --- a/tests/end_to_end/multiple-assigns/irix-g-out.c +++ b/tests/end_to_end/multiple-assigns/irix-g-out.c @@ -30,21 +30,4 @@ s32 test(s32 arg0) { } while (temp_a0_6 == 5); } return sp4; - // bug: did not emit code for node #1; contents below: - D_410150 = phi_a0; - temp_a0 = phi_a0 + 1; - D_410150 = temp_a0; - temp_a0_2 = temp_a0 + 1; - D_410150 = temp_a0_2; - temp_a0_3 = temp_a0_2 + 1; - D_410150 = temp_a0_3; - sp4 = temp_a0_3; - temp_a0_4 = temp_a0_3 + 1; - D_410150 = temp_a0_4; - D_410150 = temp_a0_4; - temp_a0_5 = temp_a0_4 + 1; - D_410150 = temp_a0_5; - temp_a0_6 = temp_a0_5 + 1; - D_410150 = sp4; - phi_a0 = temp_a0_6; } diff --git a/tests/end_to_end/multiple-assigns/irix-o2-out.c b/tests/end_to_end/multiple-assigns/irix-o2-out.c index 5be2b1e3..88957ba3 100644 --- a/tests/end_to_end/multiple-assigns/irix-o2-out.c +++ b/tests/end_to_end/multiple-assigns/irix-o2-out.c @@ -30,20 +30,4 @@ s32 test(s32 arg0) { sp4 = temp_a0_3; } return sp4; - // bug: did not emit code for node #2; contents below: - D_410120 = phi_a0; - temp_a0 = phi_a0 + 1; - D_410120 = temp_a0; - temp_a0_2 = temp_a0 + 1; - D_410120 = temp_a0_2; - temp_a0_3 = temp_a0_2 + 1; - D_410120 = temp_a0_3; - temp_a0_4 = temp_a0_3 + 1; - D_410120 = temp_a0_4; - D_410120 = temp_a0_4; - temp_a0_5 = temp_a0_4 + 1; - D_410120 = temp_a0_5; - temp_a0_6 = temp_a0_5 + 1; - D_410120 = temp_a0_3; - phi_a0 = temp_a0_6; } From 914479e4a63d8239b0fe6407ffbe1967a69a572a Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 23 Jun 2021 21:30:11 -0700 Subject: [PATCH 38/54] Fix mypy issue --- src/flow_graph.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 3b539b99..68097f8e 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -888,7 +888,6 @@ def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: for case in self.cases: if case is replace_this: new_cases.append(with_this) - with_this.add_parent(self) else: new_cases.append(case) self.cases = new_cases From 75a6c192d990d2274cf3a73609f31e6f8ab74882 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Thu, 24 Jun 2021 21:10:30 -0700 Subject: [PATCH 39/54] Remove secretly=... --- src/if_statements.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/if_statements.py b/src/if_statements.py index 6f2f099e..3e209ea0 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -291,7 +291,7 @@ def label_for_node(context: Context, node: Node) -> str: return f"block_{node.block.index}" -def emit_node(context: Context, node: Node, body: Body, secretly: bool = False) -> bool: +def emit_node(context: Context, node: Node, body: Body) -> bool: """ Try to emit a node for the first time, together with a label for it. The label is only printed if something jumps to it, e.g. a loop. @@ -317,8 +317,6 @@ def emit_node(context: Context, node: Node, body: Body, secretly: bool = False) ) else: body.add_statement(LabelStatement(context, node)) - if not secretly: - context.emitted_nodes.add(node) body.add_node(node, comment_empty=True) if isinstance(node, ReturnNode): @@ -695,7 +693,6 @@ def build_flowgraph_between( # Construct the do-while loop do_while_loop = detect_loop(context, curr_start, imm_pdom) if do_while_loop: - # breakpoint() body.add_do_while_loop(do_while_loop) # Move on. From 5849d594aac4daa85a63b4e1d192941b3f413356 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Thu, 24 Jun 2021 21:12:59 -0700 Subject: [PATCH 40/54] Remove secretly=... but correctly --- src/if_statements.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/if_statements.py b/src/if_statements.py index 3e209ea0..471793bd 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -291,7 +291,7 @@ def label_for_node(context: Context, node: Node) -> str: return f"block_{node.block.index}" -def emit_node(context: Context, node: Node, body: Body) -> bool: +def emit_node(context: Context, node: Node, body: Body, secretly: bool = False) -> bool: """ Try to emit a node for the first time, together with a label for it. The label is only printed if something jumps to it, e.g. a loop. @@ -317,6 +317,7 @@ def emit_node(context: Context, node: Node, body: Body) -> bool: ) else: body.add_statement(LabelStatement(context, node)) + context.emitted_nodes.add(node) body.add_node(node, comment_empty=True) if isinstance(node, ReturnNode): From 4fdaadcf482fc4adb0d788e0ed14087786613070 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Thu, 24 Jun 2021 21:27:51 -0700 Subject: [PATCH 41/54] Remove secretly=... but correctly... again --- src/if_statements.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/if_statements.py b/src/if_statements.py index 471793bd..fd33de0d 100644 --- a/src/if_statements.py +++ b/src/if_statements.py @@ -291,7 +291,7 @@ def label_for_node(context: Context, node: Node) -> str: return f"block_{node.block.index}" -def emit_node(context: Context, node: Node, body: Body, secretly: bool = False) -> bool: +def emit_node(context: Context, node: Node, body: Body) -> bool: """ Try to emit a node for the first time, together with a label for it. The label is only printed if something jumps to it, e.g. a loop. From 25e798fb0f8e13101a5d18c9b9c10c2e2f21c0de Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 15:56:39 -0700 Subject: [PATCH 42/54] Remove old wrong comments --- src/loop_rerolling.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index d466eb1b..ebf8703d 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -88,10 +88,8 @@ def modify_node_1_instructions(instructions: List[Instruction]) -> bool: if not modify_node_1_instructions(node_1.block.instructions): return False - new_node_1 = node_1.to_basic_node( - successor=node_2 # node_2 doesn't know it's a parent yet - ) - replace_node(flow_graph, node_1, new_node_1) # now it does + new_node_1 = node_1.to_basic_node(successor=node_2) + replace_node(flow_graph, node_1, new_node_1) remove_node(flow_graph, node_4, node_7) remove_node(flow_graph, node_5, node_7) remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything?. From 2f9dcd0fd82ff9fe8edcbf0cb3218bcab05647a0 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 15:57:55 -0700 Subject: [PATCH 43/54] Undo the translate.py copy import --- src/translate.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/translate.py b/src/translate.py index 1b319e53..f33bc32b 100644 --- a/src/translate.py +++ b/src/translate.py @@ -4,7 +4,6 @@ import sys import traceback from contextlib import contextmanager -from copy import copy from typing import Any, Callable, Dict, Iterator, List, Optional, Set, Tuple, Union import attr @@ -2982,7 +2981,7 @@ def regs_clobbered_until_dominator( if node.immediate_dominator is None: return set() seen = {node.immediate_dominator} - stack = copy(node.parents) + stack = node.parents[:] clobbered = set() while stack: n = stack.pop() @@ -3004,7 +3003,7 @@ def reg_always_set( if node.immediate_dominator is None: return False seen = {node.immediate_dominator} - stack = copy(node.parents) + stack = node.parents[:] while stack: n = stack.pop() if n == node.immediate_dominator and not dom_set: From 66083c3f7d14d20b476e7dc2617ba1960f09d138 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 15:59:03 -0700 Subject: [PATCH 44/54] Undo all translate.py 'fixes' --- src/translate.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/src/translate.py b/src/translate.py index f33bc32b..b81236ff 100644 --- a/src/translate.py +++ b/src/translate.py @@ -17,7 +17,6 @@ ReturnNode, SwitchNode, TerminalNode, - build_flowgraph, ) from .options import Formatter, Options from .parse_file import AsmData, AsmDataEntry @@ -3032,17 +3031,11 @@ def assign_phis(used_phis: List[PhiExpr], stack_info: StackInfo) -> None: while i < len(used_phis): phi = used_phis[i] assert phi.num_usages > 0 - # assert len(phi.node.parents) >= 2 - if len(phi.node.parents) < 2: - i += 1 - continue + assert len(phi.node.parents) >= 2 exprs = [] for node in phi.node.parents: block_info = node.block.block_info - # assert isinstance(block_info, BlockInfo) - if not block_info: - i += 1 - continue + assert isinstance(block_info, BlockInfo) exprs.append(block_info.final_register_states[phi.reg]) first_uw = early_unwrap(exprs[0]) @@ -3100,9 +3093,7 @@ def compute_has_custom_return(nodes: List[Node]) -> None: continue for p in n.parents: block_info2 = p.block.block_info - # assert isinstance(block_info2, BlockInfo) - if not block_info2: - continue + assert isinstance(block_info2, BlockInfo) if block_info2.has_custom_return: block_info.has_custom_return = True changed = True From 85e9d558dfab5931044bfaf47b1cc5ec31520721 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:01:25 -0700 Subject: [PATCH 45/54] Remove to_basic_node() --- src/flow_graph.py | 8 -------- src/loop_rerolling.py | 2 +- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index f26b056b..84fca29e 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -778,14 +778,6 @@ class BaseNode(abc.ABC): # i.e. there is an invariant `(node.loop is None) or (node.loop.head is node)` loop: Optional["NaturalLoop"] = attr.ib(init=False, default=None) - def to_basic_node(self, successor: "Node") -> "BasicNode": - new_node = BasicNode(self.block, self.emit_goto, successor) - new_node.parents = self.parents - new_node.dominators = self.dominators - new_node.immediate_dominator = self.immediate_dominator - new_node.immediately_dominates = self.immediately_dominates - return new_node - def name(self) -> str: return str(self.block.index) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index ebf8703d..279c083b 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -88,7 +88,7 @@ def modify_node_1_instructions(instructions: List[Instruction]) -> bool: if not modify_node_1_instructions(node_1.block.instructions): return False - new_node_1 = node_1.to_basic_node(successor=node_2) + new_node_1 = BasicNode(node_1.block, node_1.emit_goto, node_2) replace_node(flow_graph, node_1, new_node_1) remove_node(flow_graph, node_4, node_7) remove_node(flow_graph, node_5, node_7) From b595ce80b12ad6660ba46ca677ef97c7d04f88c3 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:02:24 -0700 Subject: [PATCH 46/54] Use zbanks's suggestion in SwitchNode --- src/flow_graph.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/flow_graph.py b/src/flow_graph.py index 84fca29e..12511271 100644 --- a/src/flow_graph.py +++ b/src/flow_graph.py @@ -876,13 +876,9 @@ class SwitchNode(BaseNode): cases: List["Node"] = attr.ib() def replace_any_children(self, replace_this: "Node", with_this: "Node") -> None: - new_cases: List["Node"] = [] - for case in self.cases: + for i, case in enumerate(self.cases): if case is replace_this: - new_cases.append(with_this) - else: - new_cases.append(case) - self.cases = new_cases + self.cases[i] = with_this def children(self) -> List["Node"]: # Deduplicate nodes in `self.cases` From ec1764539af8a53a9827c589769a86d17c6662c8 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:03:59 -0700 Subject: [PATCH 47/54] Move compute_relations() to 'if changed' block --- src/loop_rerolling.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index 279c083b..1d56a213 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -98,9 +98,6 @@ def modify_node_1_instructions(instructions: List[Instruction]) -> bool: def reroll_loops(flow_graph: FlowGraph) -> FlowGraph: - # TODO: What if knocking out nodes reveals another set of nodes - # that look identical? We will incorrectly be merging two - # adjacent for-loops. changed: bool = True while changed: changed = False @@ -109,6 +106,6 @@ def reroll_loops(flow_graph: FlowGraph) -> FlowGraph: continue changed = reroll_loop(flow_graph, node) if changed: + compute_relations(flow_graph.nodes) break - compute_relations(flow_graph.nodes) return flow_graph From ef59716f159ca16669da124124acaf1c6ab76108 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:12:46 -0700 Subject: [PATCH 48/54] Factor out the matching of nodes logic --- src/loop_rerolling.py | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index 1d56a213..cffb2279 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -1,4 +1,4 @@ -from typing import List +from typing import List, Optional, Tuple from .flow_graph import ( BasicNode, @@ -28,17 +28,17 @@ def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> replace_node_references(flow_graph, replace_this, with_this) -def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: +def match_nodes(start: ConditionalNode) -> Optional[Tuple[Node, ...]]: node_1 = start.fallthrough_edge node_7 = start.conditional_edge if not isinstance(node_1, ConditionalNode): - return False + return None node_2 = node_1.fallthrough_edge node_5 = node_1.conditional_edge if not isinstance(node_2, BasicNode): - return False + return None node_3 = node_2.successor if not ( @@ -46,7 +46,7 @@ def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: and node_3.loop and node_3.conditional_edge is node_3 ): - return False + return None node_4 = node_3.fallthrough_edge if not ( @@ -54,10 +54,10 @@ def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: and node_4.fallthrough_edge is node_5 and node_4.conditional_edge is node_7 ): - return False + return None if not isinstance(node_5, BasicNode): - return False + return None node_6 = node_5.successor if not ( @@ -66,7 +66,15 @@ def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: and node_6.conditional_edge is node_6 and node_6.fallthrough_edge is node_7 ): + return None + return (node_1, node_2, node_3, node_4, node_5, node_6, node_7) + + +def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: + nodes = match_nodes(start) + if nodes is None: return False + (node_1, node_2, node_3, node_4, node_5, node_6, node_7) = nodes def modify_node_1_instructions(instructions: List[Instruction]) -> bool: # First, we check that the node has the instructions we From fc7fff94b25a127c717ca2057c8500fa79bd420d Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:52:08 -0700 Subject: [PATCH 49/54] Implement drop-in replacement for match_nodes --- src/loop_rerolling.py | 66 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 2 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index cffb2279..629ca304 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, Union from .flow_graph import ( BasicNode, @@ -70,8 +70,70 @@ def match_nodes(start: ConditionalNode) -> Optional[Tuple[Node, ...]]: return (node_1, node_2, node_3, node_4, node_5, node_6, node_7) +PatternGraph = Dict[int, Union[int, Tuple[int, int]]] + +IDO_O2_SIMPLE_LOOP: PatternGraph = { + 0: (1, 7), + 1: (2, 5), + 2: 3, + 3: (4, 3), + 4: (5, 7), + 5: 6, + 6: (7, 6), +} + + +def detect_pattern( + pattern: PatternGraph, flow_graph: FlowGraph, start: Node +) -> Optional[Tuple[Node, ...]]: + def idx_eq(node1: Node, node2: Node) -> bool: + return node1.block.index == node2.block.index + + indices = [node.block.index for node in flow_graph.nodes] + assert sorted(indices) == indices, "FlowGraphs should be sorted" + + offset = start.block.index + for label in pattern.keys(): + try: + node = flow_graph.nodes[label + offset] + target = pattern[label] + if isinstance(target, int): + if not isinstance(node, BasicNode) or not idx_eq( + node.successor, flow_graph.nodes[offset + target] + ): + return None + else: + (fallthrough, conditional) = target + if ( + not isinstance(node, ConditionalNode) + or not idx_eq( + node.conditional_edge, + flow_graph.nodes[offset + conditional], + ) + or not idx_eq( + node.fallthrough_edge, + flow_graph.nodes[offset + fallthrough], + ) + ): + return None + except IndexError: + return None + + all_nodes_in_pattern = ( + {offset + label for label in pattern.keys()} + | {offset + label[0] for label in pattern.values() if isinstance(label, tuple)} + | {offset + label[1] for label in pattern.values() if isinstance(label, tuple)} + ) + return tuple( + node + for i, node in enumerate(flow_graph.nodes) + if node is not start and i in all_nodes_in_pattern + ) + + def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: - nodes = match_nodes(start) + # nodes = match_nodes(start) + nodes = detect_pattern(IDO_O2_SIMPLE_LOOP, flow_graph, start) if nodes is None: return False (node_1, node_2, node_3, node_4, node_5, node_6, node_7) = nodes From d63b4afe5db2517b7c03a75cf2a641b0b2207b2b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:52:28 -0700 Subject: [PATCH 50/54] Delete match_nodes() --- src/loop_rerolling.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index 629ca304..74ee4d5b 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -28,48 +28,6 @@ def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> replace_node_references(flow_graph, replace_this, with_this) -def match_nodes(start: ConditionalNode) -> Optional[Tuple[Node, ...]]: - node_1 = start.fallthrough_edge - node_7 = start.conditional_edge - - if not isinstance(node_1, ConditionalNode): - return None - node_2 = node_1.fallthrough_edge - node_5 = node_1.conditional_edge - - if not isinstance(node_2, BasicNode): - return None - node_3 = node_2.successor - - if not ( - isinstance(node_3, ConditionalNode) - and node_3.loop - and node_3.conditional_edge is node_3 - ): - return None - node_4 = node_3.fallthrough_edge - - if not ( - isinstance(node_4, ConditionalNode) - and node_4.fallthrough_edge is node_5 - and node_4.conditional_edge is node_7 - ): - return None - - if not isinstance(node_5, BasicNode): - return None - node_6 = node_5.successor - - if not ( - isinstance(node_6, ConditionalNode) - and node_6.loop - and node_6.conditional_edge is node_6 - and node_6.fallthrough_edge is node_7 - ): - return None - return (node_1, node_2, node_3, node_4, node_5, node_6, node_7) - - PatternGraph = Dict[int, Union[int, Tuple[int, int]]] IDO_O2_SIMPLE_LOOP: PatternGraph = { @@ -132,7 +90,6 @@ def idx_eq(node1: Node, node2: Node) -> bool: def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: - # nodes = match_nodes(start) nodes = detect_pattern(IDO_O2_SIMPLE_LOOP, flow_graph, start) if nodes is None: return False From 0df65fca3398743e4d68e28f2bd61ac6afffc622 Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:55:16 -0700 Subject: [PATCH 51/54] Delete idx_eq() --- src/loop_rerolling.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index 74ee4d5b..af81a5bb 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -44,9 +44,6 @@ def replace_node(flow_graph: FlowGraph, replace_this: Node, with_this: Node) -> def detect_pattern( pattern: PatternGraph, flow_graph: FlowGraph, start: Node ) -> Optional[Tuple[Node, ...]]: - def idx_eq(node1: Node, node2: Node) -> bool: - return node1.block.index == node2.block.index - indices = [node.block.index for node in flow_graph.nodes] assert sorted(indices) == indices, "FlowGraphs should be sorted" @@ -56,22 +53,19 @@ def idx_eq(node1: Node, node2: Node) -> bool: node = flow_graph.nodes[label + offset] target = pattern[label] if isinstance(target, int): - if not isinstance(node, BasicNode) or not idx_eq( - node.successor, flow_graph.nodes[offset + target] + if ( + not isinstance(node, BasicNode) + or node.successor is not flow_graph.nodes[offset + target] ): return None else: (fallthrough, conditional) = target if ( not isinstance(node, ConditionalNode) - or not idx_eq( - node.conditional_edge, - flow_graph.nodes[offset + conditional], - ) - or not idx_eq( - node.fallthrough_edge, - flow_graph.nodes[offset + fallthrough], - ) + or node.conditional_edge + is not flow_graph.nodes[offset + conditional] + or node.fallthrough_edge + is not flow_graph.nodes[offset + fallthrough] ): return None except IndexError: From c89f8a07bbd66100e9f9e886caa28c998d218f8c Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 16:58:17 -0700 Subject: [PATCH 52/54] De Morgan's simplification --- src/loop_rerolling.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index af81a5bb..dfa45a3f 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -53,19 +53,17 @@ def detect_pattern( node = flow_graph.nodes[label + offset] target = pattern[label] if isinstance(target, int): - if ( - not isinstance(node, BasicNode) - or node.successor is not flow_graph.nodes[offset + target] + if not ( + isinstance(node, BasicNode) + and node.successor is flow_graph.nodes[offset + target] ): return None else: (fallthrough, conditional) = target - if ( - not isinstance(node, ConditionalNode) - or node.conditional_edge - is not flow_graph.nodes[offset + conditional] - or node.fallthrough_edge - is not flow_graph.nodes[offset + fallthrough] + if not ( + isinstance(node, ConditionalNode) + and node.conditional_edge is flow_graph.nodes[offset + conditional] + and node.fallthrough_edge is flow_graph.nodes[offset + fallthrough] ): return None except IndexError: From f9b25ed9ebde19c11e23210642c5e1629d5c101b Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 17:14:09 -0700 Subject: [PATCH 53/54] Introduce remove_and_replace_nodes() --- src/loop_rerolling.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index dfa45a3f..74770072 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -81,6 +81,15 @@ def detect_pattern( ) +def remove_and_replace_nodes(flow_graph: FlowGraph, nodes: Tuple[Node, ...]) -> None: + (node_1, node_2, node_3, node_4, node_5, node_6, node_7) = nodes + new_node_1 = BasicNode(node_1.block, node_1.emit_goto, node_2) + replace_node(flow_graph, node_1, new_node_1) + remove_node(flow_graph, node_4, node_7) + remove_node(flow_graph, node_5, node_7) + remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything?. + + def reroll_loop(flow_graph: FlowGraph, start: ConditionalNode) -> bool: nodes = detect_pattern(IDO_O2_SIMPLE_LOOP, flow_graph, start) if nodes is None: @@ -107,11 +116,7 @@ def modify_node_1_instructions(instructions: List[Instruction]) -> bool: if not modify_node_1_instructions(node_1.block.instructions): return False - new_node_1 = BasicNode(node_1.block, node_1.emit_goto, node_2) - replace_node(flow_graph, node_1, new_node_1) - remove_node(flow_graph, node_4, node_7) - remove_node(flow_graph, node_5, node_7) - remove_node(flow_graph, node_6, node_7) # TODO: assert didn't execute anything?. + remove_and_replace_nodes(flow_graph, nodes) return True From 587e770ae6c93a122f6dbf3ce7102690a90dd32c Mon Sep 17 00:00:00 2001 From: Matt Kempster Date: Wed, 30 Jun 2021 17:16:16 -0700 Subject: [PATCH 54/54] Leave a note to myself --- src/loop_rerolling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/loop_rerolling.py b/src/loop_rerolling.py index 74770072..36fddec2 100644 --- a/src/loop_rerolling.py +++ b/src/loop_rerolling.py @@ -69,6 +69,9 @@ def detect_pattern( except IndexError: return None + # TODO: Check that the subgraph is self-contained except for the entry + # and exit nodes, which themselves should be somehow "designated" using an enum. + all_nodes_in_pattern = ( {offset + label for label in pattern.keys()} | {offset + label[0] for label in pattern.values() if isinstance(label, tuple)}