Xilinx · konstantinschwarz · Aug 21, 2024 · Aug 9, 2024 · Aug 20, 2024 · Aug 20, 2024
@@ -0,0 +1,299 @@
+//===- AIE1LegalizerInfo.cpp ----------------------------------------------===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file implements the targeting of the Machinelegalizer class for AIE.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#include "AIE1LegalizerInfo.h"
+#include "AIEBaseISelLowering.h"
+#include "AIEBaseSubtarget.h"
+#include "AIELegalizerHelper.h"
+#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGenTypes/LowLevelType.h"
+#include <initializer_list>
+
+using namespace llvm;
+
+AIE1LegalizerInfo::AIE1LegalizerInfo(const AIEBaseSubtarget &ST)
+    : AIEHelper(ST) {
+  using namespace TargetOpcode;
+  const LLT S8 = LLT::scalar(8);
+  const LLT S16 = LLT::scalar(16);
+  const LLT S20 = LLT::scalar(20);
+  const LLT S32 = LLT::scalar(32);
+  const LLT S64 = LLT::scalar(64);
+  const LLT P0 = LLT::pointer(0, 20);
+
+  // 128-bit vectors
+  const LLT V16S8 = LLT::fixed_vector(16, 8);
+  const LLT V8S16 = LLT::fixed_vector(8, 16);
+  const LLT V4S32 = LLT::fixed_vector(4, 32);
+
+  // 256-bit vectors
+  const LLT V8S32 = LLT::fixed_vector(8, 32);
+  const LLT V16S16 = LLT::fixed_vector(16, 16);
+  const LLT V32S8 = LLT::fixed_vector(32, 8);
+
+  // 256-bit accumulators
+  const LLT ACC256 = LLT::fixed_vector(4, 64);
+
+  // 512-bit vectors
+  const LLT V16S32 = LLT::fixed_vector(16, 32);
+  const LLT V32S16 = LLT::fixed_vector(32, 16);
+  const LLT V64S8 = LLT::fixed_vector(64, 8);
+
+  // 512-bit accumulators
+  const LLT ACC512 = LLT::fixed_vector(8, 64);
+
+  // 1024-bit vectors
+  const LLT V32S32 = LLT::fixed_vector(32, 32);
+  const LLT V64S16 = LLT::fixed_vector(64, 16);
+  const LLT V128S8 = LLT::fixed_vector(128, 8);
+
+  // 1024-bit accumulators
+  const LLT ACC1024 = LLT::fixed_vector(16, 64);
+
+  const LLT S128 = LLT::scalar(128);
+
+  auto &IMPLICIT = getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+                       .legalFor({S20, S32, P0, S128});
+
+  IMPLICIT.widenScalarToNextPow2(0).clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_CONSTANT)
+      .legalFor({S20, S32, P0})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // FIXME: AIE1 actually supports float
+  getActionDefinitionsBuilder(G_FCONSTANT).customFor({S16, S32, S64});
+
+  getActionDefinitionsBuilder(G_ICMP)
+      .legalFor({{S32, S32}, {S32, P0}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_FMUL, G_FDIV, G_FADD, G_FSUB, G_FREM})
+      .libcallFor({S32, S64});
+
+  // Since the only integers smaller than 32 bits we produce are S20 (from
+  // G_PTRTOINT), the only legal extension is S20 -> S32.
+  // Extensions to types larger than 64 bits have to be broken down into
+  // multiple parts.
+  getActionDefinitionsBuilder({G_ANYEXT, G_SEXT, G_ZEXT})
+      .legalFor({{S32, S20}})
+      .clampScalar(0, S32, S32);
+  // FIXME: (s|z|any)ext s20 to s64 is broken.
+
+  getActionDefinitionsBuilder({G_AND, G_OR})
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_XOR)
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_SEXT_INREG).custom();
+
+  getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL})
+      .legalFor({{S32, S32}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder(G_TRUNC).alwaysLegal();
+
+  getActionDefinitionsBuilder(G_SELECT)
+      .legalFor({{S32, S32}, {P0, S32}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_ADD, G_SUB})
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // FIXME: G_SADDE/G_SSUBE doesn't support lowering. To support this properly,
+  // the action needs to be implemented
+  getActionDefinitionsBuilder({G_UADDO, G_USUBO, G_UADDE, G_USUBE, G_SADDO,
+                               G_SSUBO, G_SADDE, G_SSUBE, G_UADDSAT, G_USUBSAT,
+                               G_SADDSAT, G_SSUBSAT})
+      .lower();
+
+  getActionDefinitionsBuilder(G_MUL)
+      .legalFor({S32})
+      .widenScalarToNextPow2(0)
+      .minScalar(0, S32)
+      .libcallFor({S64});
+
+  // FIXME: G_SMULO, G_UMULO support
+  getActionDefinitionsBuilder({G_UMULH, G_SMULH}).lower();
+
+  getActionDefinitionsBuilder({G_SDIV, G_UDIV, G_SREM, G_UREM})
+      .libcallFor({S32, S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S64);
+
+  getActionDefinitionsBuilder({G_SDIVREM, G_UDIVREM})
+      .lowerFor({S32, S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S64);
+
+  getActionDefinitionsBuilder(G_ABS)
+      .legalFor({S32})
+      .lowerFor({S64})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  // The CLZ instruction implements CTLZ, which also covers CTLZ_ZERO_UNDEF
+  getActionDefinitionsBuilder(G_CTLZ_ZERO_UNDEF)
+      .lowerFor({{S32, S32}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder(G_CTLZ)
+      .legalFor({{S32, S32}})
+      .clampScalar(0, S32, S32)
+      .clampScalar(1, S32, S32);
+
+  getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();
+
+  getActionDefinitionsBuilder({G_MEMCPY, G_MEMSET, G_MEMMOVE})
+      .customIf([=](const LegalityQuery &Query) {
+        const LLT SizeArg = Query.Types[2];
+        return SizeArg == S20;
+      })
+      .libcall();
+
+  getActionDefinitionsBuilder(G_DYN_STACKALLOC).custom();
+  getActionDefinitionsBuilder({G_STACKSAVE, G_STACKRESTORE}).lower();
+
+  getActionDefinitionsBuilder({G_SMIN, G_SMAX, G_UMIN, G_UMAX})
+      .widenScalarToNextPow2(0, 32)
+      .lower();
+
+  getActionDefinitionsBuilder({G_FRAME_INDEX, G_GLOBAL_VALUE}).legalFor({P0});
+
+  getActionDefinitionsBuilder(G_INTTOPTR)
+      .legalFor({{P0, S20}})
+      .widenScalarToNextPow2(1)
+      .clampScalar(1, S20, S20);
+
+  getActionDefinitionsBuilder(G_PTRTOINT)
+      .legalFor({{S20, P0}})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S20, S20);
+
+  // We support pointer arithmetic on both GPRs (32-bits) and pointer regs
+  // (20-bits, where the scalar addend resides in a MOD register). To allow
+  // specifying alternative register bank mappings, we need to truncate the RHS
+  // operand to 20-bits, thus we only allow s20 types for the scalar addend
+  getActionDefinitionsBuilder(G_PTR_ADD)
+      .legalFor({{P0, S20}})
+      .widenScalarToNextPow2(1)
+      .clampScalar(1, S20, S20);
+
+  getActionDefinitionsBuilder({G_LOAD, G_STORE})
+      .legalForTypesWithMemDesc({
+          {S32, P0, S8, 8},         {S32, P0, S16, 16},
+          {S20, P0, S20, 32},       {S32, P0, S32, 32},
+          {P0, P0, S20, 32},        {V16S8, P0, V16S8, 16},
+          {V8S16, P0, V8S16, 16},   {V4S32, P0, V4S32, 16},
+          {V8S32, P0, V8S32, 32},   {V16S16, P0, V16S16, 32},
+          {V32S8, P0, V32S8, 32},   {V16S32, P0, V16S32, 32},
+          {V32S16, P0, V32S16, 32}, {V64S8, P0, V64S8, 32},
+          {V32S32, P0, V32S32, 32}, {V64S16, P0, V64S16, 32},
+          {V128S8, P0, V128S8, 32}, {ACC256, P0, ACC256, 32},
+          {ACC512, P0, ACC512, 32}, {ACC1024, P0, ACC1024, 32},
+          {S128, P0, S128, 16},
+      })
+      .widenScalarToNextPow2(0)
+      .lowerIfMemSizeNotPow2()
+      .bitcastIf(
+          [=](const LegalityQuery &Query) {
+            const LLT &Ty = Query.Types[0];
+            return Ty.isVector() &&
+                   (Ty.getSizeInBits() == 64 || Ty.getSizeInBits() == 32);
+          },
+          [=](const LegalityQuery &Query) {
+            const LLT Ty = Query.Types[0];
+            const unsigned Size = Ty.getSizeInBits();
+            assert(Size % 32 == 0);
+            return std::pair(0, LLT::scalar(Size));
+          })
+      .clampScalar(0, S32, S32)
+      .lower();
+
+  // FIXME: Storing a pointer to an un-aligned address isn't supported.
+  getActionDefinitionsBuilder({G_ZEXTLOAD, G_SEXTLOAD})
+      .legalForTypesWithMemDesc({{S32, P0, S8, 8}, {S32, P0, S16, 16}})
+      .widenScalarToNextPow2(0)
+      .lowerIfMemSizeNotPow2()
+      .clampScalar(0, S32, S32)
+      .lower();
+
+  // Control-flow
+  getActionDefinitionsBuilder(G_BRCOND).legalFor({S32}).clampScalar(0, S32,
+                                                                    S32);
+
+  getActionDefinitionsBuilder(G_PHI)
+      .legalFor({S20, S32, P0})
+      .widenScalarToNextPow2(0)
+      .clampScalar(0, S32, S32);
+
+  getActionDefinitionsBuilder(G_JUMP_TABLE).custom();
+
+  getActionDefinitionsBuilder(G_BRJT).custom();
+
+  getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({P0});
+
+  // Variadic functions
+  getActionDefinitionsBuilder(G_VASTART).custom();
+  getActionDefinitionsBuilder(G_VAARG).custom();
+
+  getLegacyLegalizerInfo().computeTables();
+  verify(*ST.getInstrInfo());
+}
+
+bool AIE1LegalizerInfo::legalizeCustom(
+    LegalizerHelper &Helper, MachineInstr &MI,
+    LostDebugLocObserver &LocObserver) const {
+  switch (MI.getOpcode()) {
+  default:
+    break;
+  case TargetOpcode::G_VASTART:
+    return AIEHelper.legalizeG_VASTART(Helper, MI);
+  case TargetOpcode::G_VAARG:
+    return AIEHelper.legalizeG_VAARG(Helper, MI);
+  case TargetOpcode::G_MEMSET:
+  case TargetOpcode::G_MEMCPY:
+  case TargetOpcode::G_MEMMOVE:
+    return AIEHelper.legalizeMemCalls(Helper, MI, LocObserver);
+  case TargetOpcode::G_BRJT:
+    return AIEHelper.legalizeG_BRJT(Helper, MI);
+  case TargetOpcode::G_FCONSTANT:
+    return AIEHelper.legalizeG_FCONSTANT(Helper, MI);
+  case TargetOpcode::G_JUMP_TABLE:
+    return AIEHelper.legalizeG_JUMP_TABLE(Helper, MI);
+  case TargetOpcode::G_DYN_STACKALLOC:
+    return AIEHelper.legalizeG_DYN_STACKALLOC(Helper, MI);
+  case TargetOpcode::G_SEXT_INREG:
+    return AIEHelper.legalizeG_SEXT_INREG(Helper, MI);
+  }
+
+  llvm_unreachable("Un-expected custom legalization");
+}
@@ -0,0 +1,37 @@
+//===- AIELegalizerInfo.h ---------------------------------------*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// (c) Copyright 2023-2024 Advanced Micro Devices, Inc. or its affiliates
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares the targeting of the Machinelegalizer class for AIE.
+/// \todo This should be generated by TableGen.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H
+#define LLVM_LIB_TARGET_AIE_AIE1MACHINELEGALIZER_H
+
+#include "AIELegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
+#include "llvm/CodeGen/Register.h"
+#include "llvm/IR/InstrTypes.h"
+
+namespace llvm {
+
+class AIEBaseSubtarget;
+
+/// This class provides legalization strategies.
+class AIE1LegalizerInfo : public LegalizerInfo {
+  AIELegalizerHelper AIEHelper;
+
+public:
+  AIE1LegalizerInfo(const AIEBaseSubtarget &ST);
+  bool legalizeCustom(LegalizerHelper &Helper, MachineInstr &MI,
+                      LostDebugLocObserver &LocObserver) const override;
+};
+} // end namespace llvm
+#endif
@@ -744,6 +744,27 @@ unsigned AIE2InstrInfo::getCycleSeparatorOpcode() const {
   return AIE2::CYCLE_SEPARATOR;
 }
 
+unsigned AIE2InstrInfo::getGenericAddVectorEltOpcode() const {
+  return AIE2::G_AIE_ADD_VECTOR_ELT_LEFT;
+}
+
+unsigned AIE2InstrInfo::getGenericInsertVectorEltOpcode() const {
+  return AIE2::G_AIE_INSERT_VECTOR_ELT;
+}
+
+unsigned AIE2InstrInfo::getGenericExtractVectorEltOpcode(bool SignExt) const {
+  return SignExt ? AIE2::G_AIE_SEXT_EXTRACT_VECTOR_ELT
+                 : AIE2::G_AIE_ZEXT_EXTRACT_VECTOR_ELT;
+}
+
+unsigned AIE2InstrInfo::getGenericPadVectorOpcode() const {
+  return AIE2::G_AIE_PAD_VECTOR_UNDEF;
+}
+
+unsigned AIE2InstrInfo::getGenericUnpadVectorOpcode() const {
+  return AIE2::G_AIE_UNPAD_VECTOR;
+}
+
 unsigned int getVLDSparseOpcode(unsigned int PseudoOpc) {
   switch (PseudoOpc) {
   case AIE2::PSEUDO_VLD_SPARSE_POP_16_set_low:

@@ -42,6 +42,11 @@ class AIE2InstrInfo : public AIE2GenInstrInfo {
   unsigned getPseudoMoveOpcode() const override;
   unsigned getConstantMovOpcode(MachineRegisterInfo &MRI, unsigned int Reg,
                                 APInt &Val) const override;
+  unsigned getGenericAddVectorEltOpcode() const override;
+  unsigned getGenericInsertVectorEltOpcode() const override;
+  unsigned getGenericExtractVectorEltOpcode(bool SignExt) const override;
+  unsigned getGenericPadVectorOpcode() const override;
+  unsigned getGenericUnpadVectorOpcode() const override;
   unsigned getCycleSeparatorOpcode() const override;
   bool isLock(unsigned Opc) const override;
   bool isDelayedSchedBarrier(const MachineInstr &MI) const override;