-
Notifications
You must be signed in to change notification settings - Fork 14
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[AIE2] Add SWP tests with SR WAW dependencies
As we start teaching the SW pipeliner about status registers, they will serve as a baseline to visualize improvements.
- Loading branch information
Showing
1 changed file
with
212 additions
and
0 deletions.
There are no files selected for viewing
212 changes: 212 additions & 0 deletions
212
llvm/test/CodeGen/AIE/aie2/schedule/swp/swp-srflags.mir
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,212 @@ | ||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2024 Advanced Micro Devices, Inc. or its affiliates | ||
# RUN: llc --mtriple=aie2 --run-pass=pipeliner --aie-loop-min-tripcount=2 --pipeliner-max-stages=1 %s -o - | FileCheck %s | ||
|
||
# These tests verify how instructions writing the same status register (here | ||
# srfpflags) get SW pipelined. Without care, the instructions get chained together | ||
# due to WAW dependencies, making SWP impossible/inefficient. | ||
|
||
# VADD VADD | ||
# | | | ||
# VADD VADD | ||
# \ / | ||
# VADD | ||
# / \ | ||
# VST VST | ||
--- | ||
name: accum_2_chains | ||
alignment: 16 | ||
tracksRegLiveness: true | ||
debugInstrRef: false | ||
liveins: [] | ||
body: | | ||
; CHECK-LABEL: name: accum_2_chains | ||
; CHECK: bb.0: | ||
; CHECK-NEXT: successors: %bb.3(0x80000000) | ||
; CHECK-NEXT: liveins: $p0, $r0, $x0, $x2, $x4, $x6 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY $x0 | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY $x2 | ||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY $x4 | ||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY $x6 | ||
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.3: | ||
; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[VADD_F:%[0-9]+]]:acc512 = VADD_F [[COPY4]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F1:%[0-9]+]]:acc512 = VADD_F [[VADD_F]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F2:%[0-9]+]]:acc512 = VADD_F [[COPY5]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F3:%[0-9]+]]:acc512 = VADD_F [[VADD_F2]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F4:%[0-9]+]]:acc512 = VADD_F [[VADD_F1]], [[VADD_F3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:er = nsw ADD_add_r_ri [[COPY1]], -1, implicit-def $srcarry | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.4 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.4: | ||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[PHI:%[0-9]+]]:er = PHI [[ADD_add_r_ri]], %bb.3, %23, %bb.4 | ||
; CHECK-NEXT: [[PHI1:%[0-9]+]]:acc512 = PHI [[VADD_F1]], %bb.3, %24, %bb.4 | ||
; CHECK-NEXT: [[PHI2:%[0-9]+]]:acc512 = PHI [[VADD_F3]], %bb.3, %26, %bb.4 | ||
; CHECK-NEXT: [[PHI3:%[0-9]+]]:acc512 = PHI [[VADD_F4]], %bb.3, %27, %bb.4 | ||
; CHECK-NEXT: [[VADD_F5:%[0-9]+]]:acc512 = VADD_F [[PHI1]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:er = nsw ADD_add_r_ri [[PHI]], -1, implicit-def $srcarry | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: [[VADD_F6:%[0-9]+]]:acc512 = VADD_F [[VADD_F5]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F7:%[0-9]+]]:acc512 = VADD_F [[PHI2]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F8:%[0-9]+]]:acc512 = VADD_F [[VADD_F7]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F9:%[0-9]+]]:acc512 = VADD_F [[VADD_F6]], [[VADD_F8]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: PseudoJNZ [[ADD_add_r_ri1]], %bb.4 | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.5 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.5: | ||
; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[PHI4:%[0-9]+]]:acc512 = PHI [[VADD_F9]], %bb.4 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.2 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.2: | ||
; CHECK-NEXT: PseudoRET implicit $lr | ||
bb.1: | ||
liveins: $p0, $r0, $x0, $x2, $x4, $x6 | ||
%1:ep = COPY $p0 | ||
%19:er = COPY $r0 | ||
%100:acc512 = COPY $x0 | ||
%101:acc512 = COPY $x2 | ||
%10:acc512 = COPY $x4 | ||
%20:acc512 = COPY $x6 | ||
%4:er = MOV_RLC_imm10_pseudo 1 | ||
bb.3: | ||
%3:er = PHI %19, %bb.1, %16, %bb.3 | ||
%11:acc512 = PHI %10, %bb.1, %13, %bb.3 | ||
%21:acc512 = PHI %20, %bb.1, %23, %bb.3 | ||
%12:acc512 = VADD_F %11, %100, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%13:acc512 = VADD_F %12, %101, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%22:acc512 = VADD_F %21, %100, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%23:acc512 = VADD_F %22, %101, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%30:acc512 = VADD_F %13, %23, %4,implicit-def $srfpflags, implicit $crfpmask | ||
VST_dmw_sts_am_ag_idx_imm %30.sub_256_lo, %1, 0 | ||
VST_dmw_sts_am_ag_idx_imm %30.sub_256_hi, %1, 0 | ||
%16:er = nsw ADD_add_r_ri %3, -1, implicit-def $srcarry | ||
PseudoJNZ %16, %bb.3 | ||
bb.2: | ||
PseudoRET implicit $lr | ||
... | ||
|
||
# VADD VADD | ||
# | | | ||
# VADD VADD | ||
# | | | ||
# VADD VADD | ||
# | | | ||
# 2xVST 2xVST | ||
--- | ||
name: parallel_add_st | ||
alignment: 16 | ||
tracksRegLiveness: true | ||
debugInstrRef: false | ||
liveins: [] | ||
body: | | ||
; CHECK-LABEL: name: parallel_add_st | ||
; CHECK: bb.0: | ||
; CHECK-NEXT: successors: %bb.3(0x80000000) | ||
; CHECK-NEXT: liveins: $p0, $r0, $x0, $x2, $x4, $x6 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[COPY:%[0-9]+]]:ep = COPY $p0 | ||
; CHECK-NEXT: [[COPY1:%[0-9]+]]:er = COPY $r0 | ||
; CHECK-NEXT: [[COPY2:%[0-9]+]]:acc512 = COPY $x0 | ||
; CHECK-NEXT: [[COPY3:%[0-9]+]]:acc512 = COPY $x2 | ||
; CHECK-NEXT: [[COPY4:%[0-9]+]]:acc512 = COPY $x2 | ||
; CHECK-NEXT: [[COPY5:%[0-9]+]]:acc512 = COPY $x4 | ||
; CHECK-NEXT: [[COPY6:%[0-9]+]]:acc512 = COPY $x6 | ||
; CHECK-NEXT: [[MOV_RLC_imm10_pseudo:%[0-9]+]]:er = MOV_RLC_imm10_pseudo 1 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.3: | ||
; CHECK-NEXT: successors: %bb.4(0x80000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[VADD_F:%[0-9]+]]:acc512 = VADD_F [[COPY5]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F1:%[0-9]+]]:acc512 = VADD_F [[VADD_F]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F2:%[0-9]+]]:acc512 = VADD_F [[VADD_F1]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F3:%[0-9]+]]:acc512 = VADD_F [[COPY6]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F4:%[0-9]+]]:acc512 = VADD_F [[VADD_F3]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F5:%[0-9]+]]:acc512 = VADD_F [[VADD_F4]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F2]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F2]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: [[ADD_add_r_ri:%[0-9]+]]:er = nsw ADD_add_r_ri [[COPY1]], -1, implicit-def $srcarry | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.4 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.4: | ||
; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.5(0x40000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[PHI:%[0-9]+]]:er = PHI [[ADD_add_r_ri]], %bb.3, %26, %bb.4 | ||
; CHECK-NEXT: [[PHI1:%[0-9]+]]:acc512 = PHI [[VADD_F2]], %bb.3, %28, %bb.4 | ||
; CHECK-NEXT: [[PHI2:%[0-9]+]]:acc512 = PHI [[VADD_F5]], %bb.3, %31, %bb.4 | ||
; CHECK-NEXT: [[PHI3:%[0-9]+]]:acc512 = PHI [[VADD_F5]], %bb.3, %31, %bb.4 | ||
; CHECK-NEXT: [[VADD_F6:%[0-9]+]]:acc512 = VADD_F [[PHI1]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[ADD_add_r_ri1:%[0-9]+]]:er = nsw ADD_add_r_ri [[PHI]], -1, implicit-def $srcarry | ||
; CHECK-NEXT: [[VADD_F7:%[0-9]+]]:acc512 = VADD_F [[VADD_F6]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI3]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: [[VADD_F8:%[0-9]+]]:acc512 = VADD_F [[VADD_F7]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F9:%[0-9]+]]:acc512 = VADD_F [[PHI2]], [[COPY2]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: [[VADD_F10:%[0-9]+]]:acc512 = VADD_F [[VADD_F9]], [[COPY3]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F8]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[VADD_F8]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: [[VADD_F11:%[0-9]+]]:acc512 = VADD_F [[VADD_F10]], [[COPY4]], [[MOV_RLC_imm10_pseudo]], implicit-def $srfpflags, implicit $crfpmask | ||
; CHECK-NEXT: PseudoJNZ [[ADD_add_r_ri1]], %bb.4 | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.5 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.5: | ||
; CHECK-NEXT: successors: %bb.2(0x80000000) | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: [[PHI4:%[0-9]+]]:acc512 = PHI [[VADD_F11]], %bb.4 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_lo, [[COPY]], 0 | ||
; CHECK-NEXT: VST_dmw_sts_am_ag_idx_imm [[PHI4]].sub_256_hi, [[COPY]], 0 | ||
; CHECK-NEXT: PseudoJ_jump_imm %bb.2 | ||
; CHECK-NEXT: {{ $}} | ||
; CHECK-NEXT: bb.2: | ||
; CHECK-NEXT: PseudoRET implicit $lr | ||
bb.1: | ||
liveins: $p0, $r0, $x0, $x2, $x4, $x6 | ||
%1:ep = COPY $p0 | ||
%19:er = COPY $r0 | ||
%100:acc512 = COPY $x0 | ||
%101:acc512 = COPY $x2 | ||
%102:acc512 = COPY $x2 | ||
%10:acc512 = COPY $x4 | ||
%20:acc512 = COPY $x6 | ||
%4:er = MOV_RLC_imm10_pseudo 1 | ||
bb.3: | ||
%3:er = PHI %19, %bb.1, %16, %bb.3 | ||
%11:acc512 = PHI %10, %bb.1, %14, %bb.3 | ||
%21:acc512 = PHI %20, %bb.1, %24, %bb.3 | ||
%12:acc512 = VADD_F %11, %100, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%13:acc512 = VADD_F %12, %101, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%14:acc512 = VADD_F %13, %102, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%22:acc512 = VADD_F %21, %100, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%23:acc512 = VADD_F %22, %101, %4,implicit-def $srfpflags, implicit $crfpmask | ||
%24:acc512 = VADD_F %23, %102, %4,implicit-def $srfpflags, implicit $crfpmask | ||
VST_dmw_sts_am_ag_idx_imm %14.sub_256_lo, %1, 0 | ||
VST_dmw_sts_am_ag_idx_imm %14.sub_256_hi, %1, 0 | ||
VST_dmw_sts_am_ag_idx_imm %24.sub_256_lo, %1, 0 | ||
VST_dmw_sts_am_ag_idx_imm %24.sub_256_hi, %1, 0 | ||
%16:er = nsw ADD_add_r_ri %3, -1, implicit-def $srcarry | ||
PseudoJNZ %16, %bb.3 | ||
bb.2: | ||
PseudoRET implicit $lr | ||
... |