-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
RFC: Reorganize AIELegalizerInfo #159
Changes from all commits
212adc6
5f684ee
5638f64
0ff6c68
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,299 @@ | ||
//===- AIE1LegalizerInfo.cpp ----------------------------------------------===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// \file | ||
/// This file implements the targeting of the Machinelegalizer class for AIE. | ||
/// \todo This should be generated by TableGen. | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "AIE1LegalizerInfo.h" | ||
#include "AIEBaseISelLowering.h" | ||
#include "AIEBaseSubtarget.h" | ||
#include "AIELegalizerHelper.h" | ||
#include "llvm/Analysis/VectorUtils.h" | ||
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" | ||
#include "llvm/CodeGen/MachineOperand.h" | ||
#include "llvm/CodeGen/TargetOpcodes.h" | ||
#include "llvm/CodeGenTypes/LowLevelType.h" | ||
#include <initializer_list> | ||
|
||
using namespace llvm; | ||
|
||
AIE1LegalizerInfo::AIE1LegalizerInfo(const AIEBaseSubtarget &ST) | ||
: AIEHelper(ST) { | ||
using namespace TargetOpcode; | ||
const LLT S8 = LLT::scalar(8); | ||
const LLT S16 = LLT::scalar(16); | ||
const LLT S20 = LLT::scalar(20); | ||
const LLT S32 = LLT::scalar(32); | ||
const LLT S64 = LLT::scalar(64); | ||
const LLT P0 = LLT::pointer(0, 20); | ||
|
||
// 128-bit vectors | ||
const LLT V16S8 = LLT::fixed_vector(16, 8); | ||
const LLT V8S16 = LLT::fixed_vector(8, 16); | ||
const LLT V4S32 = LLT::fixed_vector(4, 32); | ||
|
||
// 256-bit vectors | ||
const LLT V8S32 = LLT::fixed_vector(8, 32); | ||
const LLT V16S16 = LLT::fixed_vector(16, 16); | ||
const LLT V32S8 = LLT::fixed_vector(32, 8); | ||
|
||
// 256-bit accumulators | ||
const LLT ACC256 = LLT::fixed_vector(4, 64); | ||
|
||
// 512-bit vectors | ||
const LLT V16S32 = LLT::fixed_vector(16, 32); | ||
const LLT V32S16 = LLT::fixed_vector(32, 16); | ||
const LLT V64S8 = LLT::fixed_vector(64, 8); | ||
|
||
// 512-bit accumulators | ||
const LLT ACC512 = LLT::fixed_vector(8, 64); | ||
|
||
// 1024-bit vectors | ||
const LLT V32S32 = LLT::fixed_vector(32, 32); | ||
const LLT V64S16 = LLT::fixed_vector(64, 16); | ||
const LLT V128S8 = LLT::fixed_vector(128, 8); | ||
|
||
// 1024-bit accumulators | ||
const LLT ACC1024 = LLT::fixed_vector(16, 64); | ||
|
||
const LLT S128 = LLT::scalar(128); | ||
|
||
auto &IMPLICIT = getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE}) | ||
.legalFor({S20, S32, P0, S128}); | ||
|
||
IMPLICIT.widenScalarToNextPow2(0).clampScalar(0, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_CONSTANT) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These is a decent overlap of these definitions, which is a shame since it adds duplication. Maybe we can reuse the trick you used for the opcodes and define a base class with Disclaimer: didn't test this yet There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I did consider this, e.g. through the common CRTP pattern. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I can see that. It is very much a DRY-code complexity trade off. My worry is mainly in having to deal with the duplication if the number of variants becomes very large. Then changes to the legalizer might require changes to 10 different files which need to be kept into sync. But that is a worry for the long term and something that can be dealt with then. |
||
.legalFor({S20, S32, P0}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
// FIXME: AIE1 actually supports float | ||
getActionDefinitionsBuilder(G_FCONSTANT).customFor({S16, S32, S64}); | ||
|
||
getActionDefinitionsBuilder(G_ICMP) | ||
.legalFor({{S32, S32}, {S32, P0}}) | ||
.clampScalar(0, S32, S32) | ||
.clampScalar(1, S32, S32); | ||
|
||
getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FADD, G_FSUB, G_FREM}) | ||
.libcallFor({S32, S64}); | ||
|
||
// Since the only integers smaller than 32 bits we produce are S20 (from | ||
// G_PTRTOINT), the only legal extension is S20 -> S32. | ||
// Extensions to types larger than 64 bits have to be broken down into | ||
// multiple parts. | ||
getActionDefinitionsBuilder({G_ANYEXT, G_SEXT, G_ZEXT}) | ||
.legalFor({{S32, S20}}) | ||
.clampScalar(0, S32, S32); | ||
// FIXME: (s|z|any)ext s20 to s64 is broken. | ||
|
||
getActionDefinitionsBuilder({G_AND, G_OR}) | ||
.legalFor({S32}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_XOR) | ||
.legalFor({S32}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_SEXT_INREG).custom(); | ||
|
||
getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL}) | ||
.legalFor({{S32, S32}}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32) | ||
.clampScalar(1, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_TRUNC).alwaysLegal(); | ||
|
||
getActionDefinitionsBuilder(G_SELECT) | ||
.legalFor({{S32, S32}, {P0, S32}}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32) | ||
.clampScalar(1, S32, S32); | ||
|
||
getActionDefinitionsBuilder({G_ADD, G_SUB}) | ||
.legalFor({S32}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
// FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly, | ||
// the action needs to be implemented | ||
getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_UADDE, G_USUBE, G_SADDO, | ||
G_SSUBO, G_SADDE, G_SSUBE, G_UADDSAT, G_USUBSAT, | ||
G_SADDSAT, G_SSUBSAT}) | ||
.lower(); | ||
|
||
getActionDefinitionsBuilder(G_MUL) | ||
.legalFor({S32}) | ||
.widenScalarToNextPow2(0) | ||
.minScalar(0, S32) | ||
.libcallFor({S64}); | ||
|
||
// FIXME: G_SMULO, G_UMULO support | ||
getActionDefinitionsBuilder({G_UMULH, G_SMULH}).lower(); | ||
|
||
getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM}) | ||
.libcallFor({S32, S64}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S64); | ||
|
||
getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM}) | ||
.lowerFor({S32, S64}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S64); | ||
|
||
getActionDefinitionsBuilder(G_ABS) | ||
.legalFor({S32}) | ||
.lowerFor({S64}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
// The CLZ instruction implements CTLZ, which also covers CTLZ_ZERO_UNDEF | ||
getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF) | ||
.lowerFor({{S32, S32}}) | ||
.clampScalar(0, S32, S32) | ||
.clampScalar(1, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_CTLZ) | ||
.legalFor({{S32, S32}}) | ||
.clampScalar(0, S32, S32) | ||
.clampScalar(1, S32, S32); | ||
|
||
getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower(); | ||
|
||
getActionDefinitionsBuilder({G_MEMCPY, G_MEMSET, G_MEMMOVE}) | ||
.customIf([=](const LegalityQuery &Query) { | ||
const LLT SizeArg = Query.Types[2]; | ||
return SizeArg == S20; | ||
}) | ||
.libcall(); | ||
|
||
getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom(); | ||
getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower(); | ||
|
||
getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX}) | ||
.widenScalarToNextPow2(0, 32) | ||
.lower(); | ||
|
||
getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({P0}); | ||
|
||
getActionDefinitionsBuilder(G_INTTOPTR) | ||
.legalFor({{P0, S20}}) | ||
.widenScalarToNextPow2(1) | ||
.clampScalar(1, S20, S20); | ||
|
||
getActionDefinitionsBuilder(G_PTRTOINT) | ||
.legalFor({{S20, P0}}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S20, S20); | ||
|
||
// We support pointer arithmetic on both GPRs (32-bits) and pointer regs | ||
// (20-bits, where the scalar addend resides in a MOD register). To allow | ||
// specifying alternative register bank mappings, we need to truncate the RHS | ||
// operand to 20-bits, thus we only allow s20 types for the scalar addend | ||
getActionDefinitionsBuilder(G_PTR_ADD) | ||
.legalFor({{P0, S20}}) | ||
.widenScalarToNextPow2(1) | ||
.clampScalar(1, S20, S20); | ||
|
||
getActionDefinitionsBuilder({G_LOAD, G_STORE}) | ||
.legalForTypesWithMemDesc({ | ||
{S32, P0, S8, 8}, {S32, P0, S16, 16}, | ||
{S20, P0, S20, 32}, {S32, P0, S32, 32}, | ||
{P0, P0, S20, 32}, {V16S8, P0, V16S8, 16}, | ||
{V8S16, P0, V8S16, 16}, {V4S32, P0, V4S32, 16}, | ||
{V8S32, P0, V8S32, 32}, {V16S16, P0, V16S16, 32}, | ||
{V32S8, P0, V32S8, 32}, {V16S32, P0, V16S32, 32}, | ||
{V32S16, P0, V32S16, 32}, {V64S8, P0, V64S8, 32}, | ||
{V32S32, P0, V32S32, 32}, {V64S16, P0, V64S16, 32}, | ||
{V128S8, P0, V128S8, 32}, {ACC256, P0, ACC256, 32}, | ||
{ACC512, P0, ACC512, 32}, {ACC1024, P0, ACC1024, 32}, | ||
{S128, P0, S128, 16}, | ||
}) | ||
.widenScalarToNextPow2(0) | ||
.lowerIfMemSizeNotPow2() | ||
.bitcastIf( | ||
[=](const LegalityQuery &Query) { | ||
const LLT &Ty = Query.Types[0]; | ||
return Ty.isVector() && | ||
(Ty.getSizeInBits() == 64 || Ty.getSizeInBits() == 32); | ||
}, | ||
[=](const LegalityQuery &Query) { | ||
const LLT Ty = Query.Types[0]; | ||
const unsigned Size = Ty.getSizeInBits(); | ||
assert(Size % 32 == 0); | ||
return std::pair(0, LLT::scalar(Size)); | ||
}) | ||
.clampScalar(0, S32, S32) | ||
.lower(); | ||
|
||
// FIXME: Storing a pointer to an un-aligned address isn't supported. | ||
getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD}) | ||
.legalForTypesWithMemDesc({{S32, P0, S8, 8}, {S32, P0, S16, 16}}) | ||
.widenScalarToNextPow2(0) | ||
.lowerIfMemSizeNotPow2() | ||
.clampScalar(0, S32, S32) | ||
.lower(); | ||
|
||
// Control-flow | ||
getActionDefinitionsBuilder(G_BRCOND).legalFor({S32}).clampScalar(0, S32, | ||
S32); | ||
|
||
getActionDefinitionsBuilder(G_PHI) | ||
.legalFor({S20, S32, P0}) | ||
.widenScalarToNextPow2(0) | ||
.clampScalar(0, S32, S32); | ||
|
||
getActionDefinitionsBuilder(G_JUMP_TABLE).custom(); | ||
|
||
getActionDefinitionsBuilder(G_BRJT).custom(); | ||
|
||
getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({P0}); | ||
|
||
// Variadic functions | ||
getActionDefinitionsBuilder(G_VASTART).custom(); | ||
getActionDefinitionsBuilder(G_VAARG).custom(); | ||
|
||
getLegacyLegalizerInfo().computeTables(); | ||
verify(*ST.getInstrInfo()); | ||
} | ||
|
||
bool AIE1LegalizerInfo::legalizeCustom( | ||
LegalizerHelper &Helper, MachineInstr &MI, | ||
LostDebugLocObserver &LocObserver) const { | ||
switch (MI.getOpcode()) { | ||
default: | ||
break; | ||
case TargetOpcode::G_VASTART: | ||
return AIEHelper.legalizeG_VASTART(Helper, MI); | ||
case TargetOpcode::G_VAARG: | ||
return AIEHelper.legalizeG_VAARG(Helper, MI); | ||
case TargetOpcode::G_MEMSET: | ||
case TargetOpcode::G_MEMCPY: | ||
case TargetOpcode::G_MEMMOVE: | ||
return AIEHelper.legalizeMemCalls(Helper, MI, LocObserver); | ||
case TargetOpcode::G_BRJT: | ||
return AIEHelper.legalizeG_BRJT(Helper, MI); | ||
case TargetOpcode::G_FCONSTANT: | ||
return AIEHelper.legalizeG_FCONSTANT(Helper, MI); | ||
case TargetOpcode::G_JUMP_TABLE: | ||
return AIEHelper.legalizeG_JUMP_TABLE(Helper, MI); | ||
case TargetOpcode::G_DYN_STACKALLOC: | ||
return AIEHelper.legalizeG_DYN_STACKALLOC(Helper, MI); | ||
case TargetOpcode::G_SEXT_INREG: | ||
return AIEHelper.legalizeG_SEXT_INREG(Helper, MI); | ||
} | ||
|
||
llvm_unreachable("Un-expected custom legalization"); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
//===- AIELegalizerInfo.h ---------------------------------------*- C++ -*-===// | ||
// | ||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates | ||
// | ||
//===----------------------------------------------------------------------===// | ||
/// \file | ||
/// This file declares the targeting of the Machinelegalizer class for AIE. | ||
/// \todo This should be generated by TableGen. | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H | ||
#define LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H | ||
|
||
#include "AIELegalizerHelper.h" | ||
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" | ||
#include "llvm/CodeGen/Register.h" | ||
#include "llvm/IR/InstrTypes.h" | ||
|
||
namespace llvm { | ||
|
||
class AIEBaseSubtarget; | ||
|
||
/// This class provides legalization strategies. | ||
class AIE1LegalizerInfo : public LegalizerInfo { | ||
AIELegalizerHelper AIEHelper; | ||
|
||
public: | ||
AIE1LegalizerInfo(const AIEBaseSubtarget &ST); | ||
bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI, | ||
LostDebugLocObserver &LocObserver) const override; | ||
}; | ||
} // end namespace llvm | ||
#endif |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: maybe put these in a separate header with if guards per generation? It would nice for these to be global in the file and these definitions will probably overlap quite a bit and you'd want to include it more parts of the instruction selection stack.