Skip to content

Commit

Permalink
[AIE2] Add SWP tests with SR WAW dependencies
Browse files Browse the repository at this point in the history
As we start teaching the SW pipeliner about status registers, they
will serve as a baseline to visualize improvements.
  • Loading branch information
gbossu committed May 3, 2024
1 parent e3c03b3 commit 6ec14a3
Showing 1 changed file with 212 additions and 0 deletions.
212 changes: 212 additions & 0 deletions llvm/test/CodeGen/AIE/aie2/schedule/swp/swp-srflags.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
#
# This file is licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates
# RUN: llc --mtriple=aie2 --run-pass=pipeliner --aie-loop-min-tripcount=2 --pipeliner-max-stages=1 %s -o - | FileCheck %s

# These tests verify how instructions writing the same status register (here
# srfpflags) get SW pipelined. Without care, the instructions get chained together
# due to WAW dependencies, making SWP impossible/inefficient.

# VADD VADD
# | |
# VADD VADD
# \ /
# VADD
# / \
# VST VST
---
name: accum_2_chains
alignment: 16
tracksRegLiveness: true
debugInstrRef: false
liveins: []
body: |
; CHECK-LABEL: name: accum_2_chains
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $p0, $r0, $x0, $x2, $x4, $x6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY $x0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY $x2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY $x4
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY $x6
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[VADD_F:%[0-9]+]]:acc512 = VADD_F [[COPY4]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F1:%[0-9]+]]:acc512 = VADD_F [[VADD_F]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F2:%[0-9]+]]:acc512 = VADD_F [[COPY5]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F3:%[0-9]+]]:acc512 = VADD_F [[VADD_F2]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F4:%[0-9]+]]:acc512 = VADD_F [[VADD_F1]], [[VADD_F3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:er = nsw ADD_add_r_ri [[COPY1]], -1, implicit-def $srcarry
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:er = PHI [[ADD_add_r_ri]], %bb.3, %23, %bb.4
; CHECK-NEXT: [[PHI1:%[0-9]+]]:acc512 = PHI [[VADD_F1]], %bb.3, %24, %bb.4
; CHECK-NEXT: [[PHI2:%[0-9]+]]:acc512 = PHI [[VADD_F3]], %bb.3, %26, %bb.4
; CHECK-NEXT: [[PHI3:%[0-9]+]]:acc512 = PHI [[VADD_F4]], %bb.3, %27, %bb.4
; CHECK-NEXT: [[VADD_F5:%[0-9]+]]:acc512 = VADD_F [[PHI1]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:er = nsw ADD_add_r_ri [[PHI]], -1, implicit-def $srcarry
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: [[VADD_F6:%[0-9]+]]:acc512 = VADD_F [[VADD_F5]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F7:%[0-9]+]]:acc512 = VADD_F [[PHI2]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F8:%[0-9]+]]:acc512 = VADD_F [[VADD_F7]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F9:%[0-9]+]]:acc512 = VADD_F [[VADD_F6]], [[VADD_F8]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: PseudoJNZ [[ADD_add_r_ri1]], %bb.4
; CHECK-NEXT: PseudoJ_jump_imm %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI4:%[0-9]+]]:acc512 = PHI [[VADD_F9]], %bb.4
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: PseudoRET implicit $lr
bb.1:
liveins: $p0, $r0, $x0, $x2, $x4, $x6
%1:ep = COPY $p0
%19:er = COPY $r0
%100:acc512 = COPY $x0
%101:acc512 = COPY $x2
%10:acc512 = COPY $x4
%20:acc512 = COPY $x6
%4:er = MOV_RLC_imm10_pseudo 1
bb.3:
%3:er = PHI %19, %bb.1, %16, %bb.3
%11:acc512 = PHI %10, %bb.1, %13, %bb.3
%21:acc512 = PHI %20, %bb.1, %23, %bb.3
%12:acc512 = VADD_F %11, %100, %4,implicit-def $srfpflags, implicit $crfpmask
%13:acc512 = VADD_F %12, %101, %4,implicit-def $srfpflags, implicit $crfpmask
%22:acc512 = VADD_F %21, %100, %4,implicit-def $srfpflags, implicit $crfpmask
%23:acc512 = VADD_F %22, %101, %4,implicit-def $srfpflags, implicit $crfpmask
%30:acc512 = VADD_F %13, %23, %4,implicit-def $srfpflags, implicit $crfpmask
VST_dmw_sts_am_ag_idx_imm %30.sub_256_lo, %1, 0
VST_dmw_sts_am_ag_idx_imm %30.sub_256_hi, %1, 0
%16:er = nsw ADD_add_r_ri %3, -1, implicit-def $srcarry
PseudoJNZ %16, %bb.3
bb.2:
PseudoRET implicit $lr
...

# VADD VADD
# | |
# VADD VADD
# | |
# VADD VADD
# | |
# 2xVST 2xVST
---
name: parallel_add_st
alignment: 16
tracksRegLiveness: true
debugInstrRef: false
liveins: []
body: |
; CHECK-LABEL: name: parallel_add_st
; CHECK: bb.0:
; CHECK-NEXT: successors: %bb.3(0x80000000)
; CHECK-NEXT: liveins: $p0, $r0, $x0, $x2, $x4, $x6
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY $x0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY $x2
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY $x2
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY $x4
; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc512 = COPY $x6
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.3:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[VADD_F:%[0-9]+]]:acc512 = VADD_F [[COPY5]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F1:%[0-9]+]]:acc512 = VADD_F [[VADD_F]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F2:%[0-9]+]]:acc512 = VADD_F [[VADD_F1]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F3:%[0-9]+]]:acc512 = VADD_F [[COPY6]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F4:%[0-9]+]]:acc512 = VADD_F [[VADD_F3]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F5:%[0-9]+]]:acc512 = VADD_F [[VADD_F4]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F2]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F2]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:er = nsw ADD_add_r_ri [[COPY1]], -1, implicit-def $srcarry
; CHECK-NEXT: PseudoJ_jump_imm %bb.4
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4:
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI:%[0-9]+]]:er = PHI [[ADD_add_r_ri]], %bb.3, %26, %bb.4
; CHECK-NEXT: [[PHI1:%[0-9]+]]:acc512 = PHI [[VADD_F2]], %bb.3, %28, %bb.4
; CHECK-NEXT: [[PHI2:%[0-9]+]]:acc512 = PHI [[VADD_F5]], %bb.3, %31, %bb.4
; CHECK-NEXT: [[PHI3:%[0-9]+]]:acc512 = PHI [[VADD_F5]], %bb.3, %31, %bb.4
; CHECK-NEXT: [[VADD_F6:%[0-9]+]]:acc512 = VADD_F [[PHI1]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:er = nsw ADD_add_r_ri [[PHI]], -1, implicit-def $srcarry
; CHECK-NEXT: [[VADD_F7:%[0-9]+]]:acc512 = VADD_F [[VADD_F6]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: [[VADD_F8:%[0-9]+]]:acc512 = VADD_F [[VADD_F7]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F9:%[0-9]+]]:acc512 = VADD_F [[PHI2]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: [[VADD_F10:%[0-9]+]]:acc512 = VADD_F [[VADD_F9]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F8]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F8]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: [[VADD_F11:%[0-9]+]]:acc512 = VADD_F [[VADD_F10]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask
; CHECK-NEXT: PseudoJNZ [[ADD_add_r_ri1]], %bb.4
; CHECK-NEXT: PseudoJ_jump_imm %bb.5
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.5:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI4:%[0-9]+]]:acc512 = PHI [[VADD_F11]], %bb.4
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_lo, [[COPY]], 0
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_hi, [[COPY]], 0
; CHECK-NEXT: PseudoJ_jump_imm %bb.2
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.2:
; CHECK-NEXT: PseudoRET implicit $lr
bb.1:
liveins: $p0, $r0, $x0, $x2, $x4, $x6
%1:ep = COPY $p0
%19:er = COPY $r0
%100:acc512 = COPY $x0
%101:acc512 = COPY $x2
%102:acc512 = COPY $x2
%10:acc512 = COPY $x4
%20:acc512 = COPY $x6
%4:er = MOV_RLC_imm10_pseudo 1
bb.3:
%3:er = PHI %19, %bb.1, %16, %bb.3
%11:acc512 = PHI %10, %bb.1, %14, %bb.3
%21:acc512 = PHI %20, %bb.1, %24, %bb.3
%12:acc512 = VADD_F %11, %100, %4,implicit-def $srfpflags, implicit $crfpmask
%13:acc512 = VADD_F %12, %101, %4,implicit-def $srfpflags, implicit $crfpmask
%14:acc512 = VADD_F %13, %102, %4,implicit-def $srfpflags, implicit $crfpmask
%22:acc512 = VADD_F %21, %100, %4,implicit-def $srfpflags, implicit $crfpmask
%23:acc512 = VADD_F %22, %101, %4,implicit-def $srfpflags, implicit $crfpmask
%24:acc512 = VADD_F %23, %102, %4,implicit-def $srfpflags, implicit $crfpmask
VST_dmw_sts_am_ag_idx_imm %14.sub_256_lo, %1, 0
VST_dmw_sts_am_ag_idx_imm %14.sub_256_hi, %1, 0
VST_dmw_sts_am_ag_idx_imm %24.sub_256_lo, %1, 0
VST_dmw_sts_am_ag_idx_imm %24.sub_256_hi, %1, 0
%16:er = nsw ADD_add_r_ri %3, -1, implicit-def $srcarry
PseudoJNZ %16, %bb.3
bb.2:
PseudoRET implicit $lr
...

0 comments on commit 6ec14a3

Please sign in to comment.