From 790b237cb7a705c048acf9f46c32e080033fd480 Mon Sep 17 00:00:00 2001 From: Aleksandr Bezzubikov Date: Fri, 5 Jun 2020 03:26:00 +0300 Subject: [PATCH] Add backported function pointers patches --- ...LowerOpenCL-pass-to-handle-new-block.patch | 998 +++++++++++++++ ...ntation-of-SPV_INTEL_function_pointe.patch | 1079 +++++++++++++++++ ...ering-of-constant-expressions-vector.patch | 215 ++++ ...nction-pointers-in-cast-instructions.patch | 98 ++ ...-expressions-lowering-for-function-p.patch | 192 +++ 5 files changed, 2582 insertions(+) create mode 100644 patches/spirv/0006-Backport-Update-LowerOpenCL-pass-to-handle-new-block.patch create mode 100644 patches/spirv/0007-Backport-Implementation-of-SPV_INTEL_function_pointe.patch create mode 100644 patches/spirv/0008-Support-lowering-of-constant-expressions-vector.patch create mode 100644 patches/spirv/0009-Support-function-pointers-in-cast-instructions.patch create mode 100644 patches/spirv/0010-Improve-constant-expressions-lowering-for-function-p.patch diff --git a/patches/spirv/0006-Backport-Update-LowerOpenCL-pass-to-handle-new-block.patch b/patches/spirv/0006-Backport-Update-LowerOpenCL-pass-to-handle-new-block.patch new file mode 100644 index 00000000..34908ff1 --- /dev/null +++ b/patches/spirv/0006-Backport-Update-LowerOpenCL-pass-to-handle-new-block.patch @@ -0,0 +1,998 @@ +From 0c74ecad7f7efa134a70096ce3fbc8e833aedc9e Mon Sep 17 00:00:00 2001 +From: Aleksandr Bezzubikov +Date: Fri, 15 May 2020 23:31:10 +0300 +Subject: [PATCH 06/10] [Backport] Update LowerOpenCL pass to handle new blocks + represntation in LLVM IR Original commit: + https://github.com/KhronosGroup/SPIRV-LLVM-Translator/pull/133/commits/b7b90e0 + +--- + lib/SPIRV/SPIRVLowerOCLBlocks.cpp | 249 +++------------------- + test/global_block.ll | 88 +++----- + test/literal-struct.ll | 31 +-- + test/transcoding/block_w_struct_return.ll | 47 ++-- + test/transcoding/enqueue_kernel.ll | 240 ++++++++++++--------- + 5 files changed, 244 insertions(+), 411 deletions(-) + +diff --git a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp +index 50e1838..7bdfb2f 100644 +--- a/lib/SPIRV/SPIRVLowerOCLBlocks.cpp ++++ b/lib/SPIRV/SPIRVLowerOCLBlocks.cpp +@@ -40,207 +40,34 @@ + // In both cases values with function type used as intermediate representation + // for block literal structure. + // +-// This pass is designed to find such cases and simplify them to avoid any +-// function pointer types occurrences in LLVM IR in 4 steps. +-// +-// 1. Find all function pointer allocas, like +-// %block = alloca void () * +-// +-// Then find a single store to that alloca: +-// %blockLit = alloca <{ i32, i32, ...}>, align 4 +-// %0 = bitcast <{ i32, i32, ... }>* %blockLit to void ()* +-// > store void ()* %0, void ()** %block, align 4 +-// +-// And replace the alloca users by new instructions which used stored value +-// %blockLit itself instead of function pointer alloca %block. +-// +-// 2. Find consecutive casts from block literal type to i8 addrspace(4)* +-// used function pointers as an intermediate type: +-// %0 = bitcast <{ i32, i32 }> %block to void() * +-// %1 = addrspacecast void() * %0 to i8 addrspace(4)* +-// And simplify them: +-// %2 = addrspacecast <{ i32, i32 }> %block to i8 addrspace(4)* +-// +-// 3. Find all unused instructions with function pointer type occured after +-// pp.1-2 and remove them. +-// +-// 4. Find unused globals with function pointer type, like +-// @block = constant void ()* +-// bitcast ({ i32, i32 }* @__block_literal_global to void ()* +-// +-// And remove them. ++// In LLVM IR produced by clang, blocks are represented with the following ++// structure: ++// %struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } ++// Pointers to block invoke functions are stored in the third field. Clang ++// replaces inderect function calls in all cases except if block is passed as a ++// function argument. Note that it is somewhat unclear if the OpenCL C spec ++// should allow passing blocks as function argumernts. This pass is not supposed ++// to work correctly with such functions. ++// Clang though has to store function pointers to this structure. Purpose of ++// this pass is to replace store of function pointers(not allowed in SPIR-V) ++// with null pointers. + // + //===----------------------------------------------------------------------===// + #define DEBUG_TYPE "spv-lower-ocl-blocks" + +-#include "OCLUtil.h" + #include "SPIRVInternal.h" + +-#include "llvm/ADT/SetVector.h" +-#include "llvm/Analysis/ValueTracking.h" +-#include "llvm/IR/GlobalVariable.h" +-#include "llvm/IR/InstIterator.h" + #include "llvm/IR/Module.h" + #include "llvm/Pass.h" +-#include "llvm/PassSupport.h" +-#include "llvm/Support/Casting.h" ++#include "llvm/Support/Regex.h" + + using namespace llvm; + + namespace { + +-static void +-removeUnusedFunctionPtrInst(Instruction *I, +- SmallSetVector &FuncPtrInsts) { +- for (unsigned OpIdx = 0, Ops = I->getNumOperands(); OpIdx != Ops; ++OpIdx) { +- Instruction *OpI = dyn_cast(I->getOperand(OpIdx)); +- I->setOperand(OpIdx, nullptr); +- if (OpI && OpI != I && OpI->user_empty()) +- FuncPtrInsts.insert(OpI); +- } +- I->eraseFromParent(); +-} +- +-static bool isFuncPtrAlloca(const AllocaInst *AI) { +- auto *ET = dyn_cast(AI->getAllocatedType()); +- return ET && ET->getElementType()->isFunctionTy(); +-} +- +-static bool hasFuncPtrType(const Value *V) { +- auto *PT = dyn_cast(V->getType()); +- return PT && PT->getElementType()->isFunctionTy(); +-} +- +-static bool isFuncPtrInst(const Instruction *I) { +- if (auto *AI = dyn_cast(I)) +- return isFuncPtrAlloca(AI); +- +- for (auto &Op : I->operands()) { +- if (auto *AI = dyn_cast(Op)) +- return isFuncPtrAlloca(AI); +- +- auto *OpI = dyn_cast(&Op); +- if (OpI && OpI != I && hasFuncPtrType(OpI)) +- return true; +- } +- return false; +-} +- +-static StoreInst *findSingleStore(AllocaInst *AI) { +- StoreInst *Store = nullptr; +- for (auto *U : AI->users()) { +- if (!isa(U)) +- continue; // not a store +- if (Store) +- return nullptr; // there are more than one stores +- Store = dyn_cast(U); +- } +- return Store; +-} +- +-static void fixFunctionPtrAllocaUsers(AllocaInst *AI) { +- // Find and remove a single store to alloca +- auto *SingleStore = findSingleStore(AI); +- assert(SingleStore && "More than one store to the function pointer alloca"); +- auto *StoredVal = SingleStore->getValueOperand(); +- SingleStore->eraseFromParent(); +- +- // Find loads from the alloca and replace thier users +- for (auto *U : AI->users()) { +- auto *LI = dyn_cast(U); +- if (!LI) +- continue; +- +- for (auto *U : LI->users()) { +- auto *UInst = cast(U); +- auto *Cast = CastInst::CreatePointerBitCastOrAddrSpaceCast( +- StoredVal, UInst->getType(), "", UInst); +- UInst->replaceAllUsesWith(Cast); +- } +- } +-} +- +-static int getBlockLiteralIdx(const Function &F) { +- StringRef FName = F.getName(); +- if (isEnqueueKernelBI(FName)) +- return FName.contains("events") ? 7 : 4; +- if (isKernelQueryBI(FName)) +- return FName.contains("for_ndrange") ? 2 : 1; +- if (FName.startswith("__") && FName.contains("_block_invoke")) +- return F.hasStructRetAttr() ? 1 : 0; +- +- return -1; // No block literal argument +-} +- +-static bool hasBlockLiteralArg(const Function &F) { +- return getBlockLiteralIdx(F) != -1; +-} +- +-static bool simplifyFunctionPtrCasts(Function &F) { +- bool Changed = false; +- int BlockLiteralIdx = getBlockLiteralIdx(F); +- for (auto *U : F.users()) { +- auto *Call = dyn_cast(U); +- if (!Call) +- continue; +- if (Call->getFunction()->getName() == F.getName().str() + "_kernel") +- continue; // Skip block invoke function calls inside block invoke kernels +- +- const DataLayout &DL = F.getParent()->getDataLayout(); +- auto *BlockLiteral = Call->getOperand(BlockLiteralIdx); +- auto *BlockLiteralVal = GetUnderlyingObject(BlockLiteral, DL); +- if (isa(BlockLiteralVal)) +- continue; // nothing to do with globals +- +- auto *BlockLiteralAlloca = cast(BlockLiteralVal); +- assert(!BlockLiteralAlloca->getAllocatedType()->isFunctionTy() && +- "Function type shouldn't be there"); +- +- auto *NewBlockLiteral = CastInst::CreatePointerBitCastOrAddrSpaceCast( +- BlockLiteralAlloca, BlockLiteral->getType(), "", Call); +- BlockLiteral->replaceAllUsesWith(NewBlockLiteral); +- Changed |= true; +- } +- return Changed; +-} +- +-static void +-findFunctionPtrAllocas(Module &M, +- SmallVectorImpl &FuncPtrAllocas) { +- for (auto &F : M) { +- if (F.isDeclaration()) +- continue; +- for (auto &I : instructions(F)) { +- auto *AI = dyn_cast(&I); +- if (!AI || !isFuncPtrAlloca(AI)) +- continue; +- FuncPtrAllocas.push_back(AI); +- } +- } +-} +- +-static void +-findUnusedFunctionPtrInsts(Module &M, +- SmallSetVector &FuncPtrInsts) { +- for (auto &F : M) { +- if (F.isDeclaration()) +- continue; +- for (auto &I : instructions(F)) +- if (I.user_empty() && isFuncPtrInst(&I)) +- FuncPtrInsts.insert(&I); +- } +-} +- +-static void +-findUnusedFunctionPtrGlbs(Module &M, +- SmallVectorImpl &FuncPtrGlbs) { +- for (auto &GV : M.globals()) { +- if (!GV.user_empty()) +- continue; +- auto *GVType = dyn_cast(GV.getType()->getElementType()); +- if (GVType && GVType->getElementType()->isFunctionTy()) +- FuncPtrGlbs.push_back(&GV); +- } ++static bool isBlockInvoke(Function &F) { ++ static Regex BlockInvokeRegex("_block_invoke_?[0-9]*$"); ++ return BlockInvokeRegex.match(F.getName()); + } + + class SPIRVLowerOCLBlocks : public ModulePass { +@@ -250,44 +77,24 @@ public: + + bool runOnModule(Module &M) { + bool Changed = false; +- +- // 1. Find function pointer allocas and fix their users +- SmallVector FuncPtrAllocas; +- findFunctionPtrAllocas(M, FuncPtrAllocas); +- +- Changed |= !FuncPtrAllocas.empty(); +- for (auto *AI : FuncPtrAllocas) +- fixFunctionPtrAllocaUsers(AI); +- +- // 2. Simplify consecutive casts which use function pointer types +- for (auto &F : M) +- if (hasBlockLiteralArg(F)) +- Changed |= simplifyFunctionPtrCasts(F); +- +- // 3. Cleanup unused instructions with function pointer type +- // which are occured after pp. 1-2 +- SmallSetVector FuncPtrInsts; +- findUnusedFunctionPtrInsts(M, FuncPtrInsts); +- +- Changed |= !FuncPtrInsts.empty(); +- while (!FuncPtrInsts.empty()) { +- Instruction *I = FuncPtrInsts.pop_back_val(); +- removeUnusedFunctionPtrInst(I, FuncPtrInsts); ++ for (Function &F : M) { ++ if (!isBlockInvoke(F)) ++ continue; ++ for (User *U : F.users()) { ++ if (!isa(U)) ++ continue; ++ Constant *Null = Constant::getNullValue(U->getType()); ++ if (U != Null) { ++ U->replaceAllUsesWith(Null); ++ Changed = true; ++ } ++ } + } +- +- // 4. Find and remove unused global variables with function pointer type +- SmallVector FuncPtrGlbs; +- findUnusedFunctionPtrGlbs(M, FuncPtrGlbs); +- +- Changed |= !FuncPtrGlbs.empty(); +- for (auto *GV : FuncPtrGlbs) +- GV->eraseFromParent(); +- + return Changed; + } + + static char ID; +-}; // class SPIRVLowerOCLBlocks ++}; + + char SPIRVLowerOCLBlocks::ID = 0; + +diff --git a/test/global_block.ll b/test/global_block.ll +index d1ede65..efb4cf3 100644 +--- a/test/global_block.ll ++++ b/test/global_block.ll +@@ -16,84 +16,66 @@ + ; RUN: llvm-spirv %t.bc -o %t.spv + ; RUN: llvm-spirv -r %t.spv -o - | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM + +-target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir-unknown-unknown" + + ; CHECK-SPIRV: Name [[block_invoke:[0-9]+]] "_block_invoke" + ; CHECK-SPIRV: TypeInt [[int:[0-9]+]] 32 ++; CHECK-SPIRV: TypeInt [[int8:[0-9]+]] 8 + ; CHECK-SPIRV: Constant [[int]] [[five:[0-9]+]] 5 +-; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int]] +-;; Check that block invoke function has no block descriptor argument in SPIR-V +-; CHECK-SPIRV-NOT: TypeFunction [[block_invoke_type]] [[int]] {{[0-9]+}} [[int]] ++; CHECK-SPIRV: TypePointer [[int8Ptr:[0-9]+]] 8 [[int8]] ++; CHECK-SPIRV: TypeFunction [[block_invoke_type:[0-9]+]] [[int]] [[int8Ptr]] [[int]] + +-;; This variable is not needed in SPIRV +-; CHECK-SPIRV-NOT: Name {{[0-9]+}} block_kernel.b1 +-; CHECK-LLVM-NOT: @block_kernel.b1 +-@block_kernel.b1 = internal addrspace(2) constant i32 (i32) addrspace(4)* addrspacecast (i32 (i32) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i32 (i32) addrspace(1)*) to i32 (i32) addrspace(4)*), align 8 ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@block_kernel.b1 = internal addrspace(2) constant %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (i32 (i8 addrspace(4)*, i32)* @_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + +-; Function Attrs: convergent nounwind +-define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @block_kernel(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: +- %res.addr = alloca i32 addrspace(1)*, align 8 +- store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 +- +-; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] [[five]] +-; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i32 5) +- %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 +- +- %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8, !tbaa !10 +- store i32 %call, i32 addrspace(1)* %0, align 4, !tbaa !14 ++ %res.addr = alloca i32 addrspace(1)*, align 4 ++ store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 4 ++; CHECK-SPIRV: FunctionCall [[int]] {{[0-9]+}} [[block_invoke]] {{[0-9]+}} [[five]] ++; CHECK-LLVM: %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 5) ++ %call = call spir_func i32 @_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 5) #2 ++ %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 4 ++ store i32 %call, i32 addrspace(1)* %0, align 4 + ret void + } + +-; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 0 [[block_invoke_type]] ++; CHECK-SPIRV: 5 Function [[int]] [[block_invoke]] 2 [[block_invoke_type]] ++; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int8Ptr]] {{[0-9]+}} + ; CHECK-SPIRV-NEXT: 3 FunctionParameter [[int]] {{[0-9]+}} +-; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i32 %i) +-; Function Attrs: convergent nounwind ++; CHECK-LLVM: define internal spir_func i32 @_block_invoke(i8 addrspace(4)* {{.*}}, i32 %{{.*}}) ++; Function Attrs: convergent noinline nounwind optnone + define internal spir_func i32 @_block_invoke(i8 addrspace(4)* %.block_descriptor, i32 %i) #1 { + entry: +- %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 ++ %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %i.addr = alloca i32, align 4 +- store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 +- +-;; Instruction below is useless and should be removed. +-; CHECK-SPIRV-NOT: Bitcast +-; CHECK-LLVM-NOT: bitcast +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store i32 %i, i32* %i.addr, align 4, !tbaa !14 +- %0 = load i32, i32* %i.addr, align 4, !tbaa !14 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 ++ store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store i32 %i, i32* %i.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 ++ %0 = load i32, i32* %i.addr, align 4 + %add = add nsw i32 %0, 1 + ret i32 %add + } + +-attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +-attributes #1 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { convergent } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1} +-!5 = !{!"none"} +-!6 = !{!"int*"} +-!7 = !{!""} +-!8 = !{i1 false} +-!9 = !{i32 0} +-!10 = !{!11, !11, i64 0} +-!11 = !{!"any pointer", !12, i64 0} +-!12 = !{!"omnipotent char", !13, i64 0} +-!13 = !{!"Simple C/C++ TBAA"} +-!14 = !{!15, !15, i64 0} +-!15 = !{!"int", !12, i64 0} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1} ++!4 = !{!"none"} ++!5 = !{!"int*"} ++!6 = !{!""} +diff --git a/test/literal-struct.ll b/test/literal-struct.ll +index c52170a..52a731a 100644 +--- a/test/literal-struct.ll ++++ b/test/literal-struct.ll +@@ -2,7 +2,7 @@ + ; structs, i.e. structs whose type has no name. Typicaly clang generate such + ; structs if the kernel contains OpenCL 2.0 blocks. The IR was produced with + ; the following command: +-; clang -cc1 -triple spir -cl-std=cl2.0 -O0 -finclude-default-header literal-struct.cl -emit-llvm -o test/literal-struct.ll ++; clang -cc1 -triple spir -cl-std=cl2.0 -O0 literal-struct.cl -emit-llvm -o test/literal-struct.ll + + ; literal-struct.cl: + ; void foo() +@@ -14,25 +14,28 @@ + ; RUN: llvm-as < %s | llvm-spirv -spirv-text -o %t + ; RUN: FileCheck < %t %s + +-; CHECK-DAG: TypeInt [[Int:[0-9]+]] 32 0 +-; CHECK-DAG: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] {{$}} ++; CHECK: TypeInt [[Int:[0-9]+]] 32 0 ++; CHECK: TypeInt [[Int8:[0-9]+]] 8 0 ++; CHECK: TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8]] ++; CHECK: TypeStruct [[StructType:[0-9]+]] [[Int]] [[Int]] [[Int8Ptr]] + + target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir" + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } ++ ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__foo_block_invoke to i8*) to i8 addrspace(4)*) }, align 4 + ; CHECK: ConstantComposite [[StructType]] + +-; This is artificial case is added to cover ConstantNull instrucitions with TypeStruct. +-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } zeroinitializer, align 4 ++@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } zeroinitializer, align 4 + ; CHECK: ConstantNull [[StructType]] + + ; Function Attrs: convergent noinline nounwind optnone + define spir_func void @foo() #0 { + entry: +- %myBlock = alloca void () addrspace(4)*, align 4 +- store void () addrspace(4)* addrspacecast (void () addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void () addrspace(1)*) to void () addrspace(4)*), void () addrspace(4)** %myBlock, align 4 +- call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 ++ %myBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 4 ++ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %myBlock, align 4 ++ call spir_func void @__foo_block_invoke(i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*)) #1 + ret void + } + +@@ -40,14 +43,14 @@ entry: + define internal spir_func void @__foo_block_invoke(i8 addrspace(4)* %.block_descriptor) #0 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { convergent } + + !llvm.module.flags = !{!0} +@@ -57,4 +60,4 @@ attributes #1 = { convergent } + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{!"clang version 8.0.0 "} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} +diff --git a/test/transcoding/block_w_struct_return.ll b/test/transcoding/block_w_struct_return.ll +index 76e29f0..df89b13 100644 +--- a/test/transcoding/block_w_struct_return.ll ++++ b/test/transcoding/block_w_struct_return.ll +@@ -16,6 +16,8 @@ + ; res[tid] = kernelBlock(aa).a - 6; + ; } + ++; clang -cc1 -triple spir -cl-std=cl2.0 -disable-llvm-passes -finclude-default-header block_w_struct_return.cl -emit-llvm -o test/transcoding/block_w_struct_return.ll ++ + ; RUN: llvm-as %s -o %t.bc + ; RUN: llvm-spirv %t.bc -spirv-text -o %t.spv.txt + ; RUN: FileCheck < %t.spv.txt %s --check-prefix=CHECK-SPIRV +@@ -27,12 +29,14 @@ + ; CHECK-SPIRV: Name [[BlockInv:[0-9]+]] "__block_ret_struct_block_invoke" + + ; CHECK-SPIRV: 4 TypeInt [[IntTy:[0-9]+]] 32 ++; CHECK-SPIRV: 4 TypeInt [[Int8Ty:[0-9]+]] 8 ++; CHECK-SPIRV: 4 TypePointer [[Int8Ptr:[0-9]+]] 8 [[Int8Ty]] + ; CHECK-SPIRV: 3 TypeStruct [[StructTy:[0-9]+]] [[IntTy]] + ; CHECK-SPIRV: 4 TypePointer [[StructPtrTy:[0-9]+]] 7 [[StructTy]] + + ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructArg:[0-9]+]] 7 + ; CHECK-SPIRV: 4 Variable [[StructPtrTy]] [[StructRet:[0-9]+]] 7 +-; CHECK-SPIRV: 4 PtrCastToGeneric {{[0-9]+}} [[BlockLit:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV: 4 PtrCastToGeneric [[Int8Ptr]] [[BlockLit:[0-9]+]] {{[0-9]+}} + ; CHECK-SPIRV: 7 FunctionCall {{[0-9]+}} {{[0-9]+}} [[BlockInv]] [[StructRet]] [[BlockLit]] [[StructArg]] + + ; CHECK-LLVM: %[[StructA:.*]] = type { i32 } +@@ -41,20 +45,21 @@ + target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" + target triple = "spir64-unknown-unknown" + ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + %struct.A = type { i32 } + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 16, i32 8, i8 addrspace(4)* addrspacecast (i8* bitcast (void (%struct.A*, i8 addrspace(4)*, %struct.A*)* @__block_ret_struct_block_invoke to i8*) to i8 addrspace(4)*) }, align 8 + + ; Function Attrs: convergent noinline nounwind optnone +-define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 !kernel_arg_host_accessible !8 !kernel_arg_pipe_depth !9 !kernel_arg_pipe_io !7 !kernel_arg_buffer_location !7 { ++define spir_kernel void @block_ret_struct(i32 addrspace(1)* %res) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: + %res.addr = alloca i32 addrspace(1)*, align 8 +- %kernelBlock = alloca void (%struct.A*, %struct.A*) addrspace(4)*, align 8 ++ %kernelBlock = alloca %struct.__opencl_block_literal_generic addrspace(4)*, align 8 + %tid = alloca i64, align 8 + %aa = alloca %struct.A, align 4 + %tmp = alloca %struct.A, align 4 + store i32 addrspace(1)* %res, i32 addrspace(1)** %res.addr, align 8 +- store void (%struct.A*, %struct.A*) addrspace(4)* addrspacecast (void (%struct.A*, %struct.A*) addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to void (%struct.A*, %struct.A*) addrspace(1)*) to void (%struct.A*, %struct.A*) addrspace(4)*), void (%struct.A*, %struct.A*) addrspace(4)** %kernelBlock, align 8 ++ store %struct.__opencl_block_literal_generic addrspace(4)* addrspacecast (%struct.__opencl_block_literal_generic addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to %struct.__opencl_block_literal_generic addrspace(1)*) to %struct.__opencl_block_literal_generic addrspace(4)*), %struct.__opencl_block_literal_generic addrspace(4)** %kernelBlock, align 8 + %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 + store i64 %call, i64* %tid, align 8 + %0 = load i32 addrspace(1)*, i32 addrspace(1)** %res.addr, align 8 +@@ -63,7 +68,7 @@ entry: + store i32 -1, i32 addrspace(1)* %arrayidx, align 4 + %a = getelementptr inbounds %struct.A, %struct.A* %aa, i32 0, i32 0 + store i32 5, i32* %a, align 4 +- call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 ++ call spir_func void @__block_ret_struct_block_invoke(%struct.A* sret %tmp, i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), %struct.A* byval align 4 %aa) #5 + %a1 = getelementptr inbounds %struct.A, %struct.A* %tmp, i32 0, i32 0 + %2 = load i32, i32* %a1, align 4 + %sub = sub nsw i32 %2, 6 +@@ -78,10 +83,10 @@ entry: + define internal spir_func void @__block_ret_struct_block_invoke(%struct.A* noalias sret %agg.result, i8 addrspace(4)* %.block_descriptor, %struct.A* byval align 4 %a) #1 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 8 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 8 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 8 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 8 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 8 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 8 + %a1 = getelementptr inbounds %struct.A, %struct.A* %a, i32 0, i32 0 + store i32 6, i32* %a1, align 4 + %0 = bitcast %struct.A* %agg.result to i8* +@@ -96,30 +101,22 @@ declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture r + ; Function Attrs: convergent nounwind readnone + declare spir_func i64 @_Z13get_global_idj(i32) #3 + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } +-attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #2 = { argmemonly nounwind } + attributes #3 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #4 = { convergent nounwind readnone } + attributes #5 = { convergent } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1} +-!5 = !{!"none"} +-!6 = !{!"int*"} +-!7 = !{!""} +-!8 = !{i1 false} +-!9 = !{i32 0} +- ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1} ++!4 = !{!"none"} ++!5 = !{!"int*"} ++!6 = !{!""} +diff --git a/test/transcoding/enqueue_kernel.ll b/test/transcoding/enqueue_kernel.ll +index 0d29c71..64fe5b0 100644 +--- a/test/transcoding/enqueue_kernel.ll ++++ b/test/transcoding/enqueue_kernel.ll +@@ -51,11 +51,12 @@ + ; ModuleID = 'enqueue_kernel.cl' + source_filename = "enqueue_kernel.cl" + target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +-target triple = "spir-unknown-unknown" ++target triple = "spir" + + %opencl.queue_t = type opaque + %struct.ndrange_t = type { i32 } + %opencl.clk_event_t = type opaque ++%struct.__opencl_block_literal_generic = type { i32, i32, i8 addrspace(4)* } + + ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer1:[0-9]+]] "__device_side_enqueue_block_invoke_kernel" + ; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[BlockKer2:[0-9]+]] "__device_side_enqueue_block_invoke_2_kernel" +@@ -66,89 +67,123 @@ target triple = "spir-unknown-unknown" + + ; CHECK-SPIRV: TypeInt [[Int32Ty:[0-9]+]] 32 + ; CHECK-SPIRV: TypeInt [[Int8Ty:[0-9]+]] 8 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 + ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt0:[0-9]+]] 0 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 17 ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt17:[0-9]+]] 21 + ; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt2:[0-9]+]] 2 +-; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 20 +-; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt8:[0-9]+]] 8 ++; CHECK-SPIRV: Constant [[Int32Ty]] [[ConstInt20:[0-9]+]] 24 + +-; CHECK-SPIRV: TypePointer {{[0-9]+}} 7 {{[0-9]+}} ++; CHECK-SPIRV-DAG: TypePointer {{[0-9]+}} 7 {{[0-9]+}} ++; CHECK-SPIRV-DAG: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] ++; CHECK-SPIRV: TypeVoid [[VoidTy:[0-9]+]] + ; CHECK-SPIRV: TypePointer [[Int32LocPtrTy:[0-9]+]] 7 [[Int32Ty]] + ; CHECK-SPIRV: TypeDeviceEvent [[EventTy:[0-9]+]] +-; CHECK-SPIRV: TypePointer [[Int8PtrGenTy:[0-9]+]] 8 [[Int8Ty]] + ; CHECK-SPIRV: TypePointer [[EventPtrTy:[0-9]+]] 8 [[EventTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy1:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy2:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: TypeFunction [[BlockTy3:[0-9]+]] [[VoidTy]] [[Int8PtrGenTy]] + ; CHECK-SPIRV: ConstantNull [[EventPtrTy]] [[EventNull:[0-9]+]] + +-; CHECK-LLVM: [[BlockTy1:%[0-9]+]] = type { i32, i32 } +-; CHECK-LLVM: [[BlockTy2:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i8 }> +-; CHECK-LLVM: [[BlockTy3:%[0-9]+]] = type <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> +-; CHECK-LLVM: [[BlockTy4:%[0-9]+]] = type <{ i32, i32 }> ++; CHECK-LLVM: [[BlockTy1:%[0-9a-z\.]+]] = type { i32, i32, i8 addrspace(4)* } ++; CHECK-LLVM: [[BlockTy2:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> ++; CHECK-LLVM: [[BlockTy3:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> ++; CHECK-LLVM: [[BlockTy4:%[0-9a-z\.]+]] = type <{ i32, i32, i8 addrspace(4)* }> + +-; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 +-; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 8, i32 4 }, align 4 ++; CHECK-LLVM: @__block_literal_global = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 ++; CHECK-LLVM: @__block_literal_global.1 = internal addrspace(1) constant [[BlockTy1]] { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* null to i8 addrspace(4)*) }, align 4 + +-@__block_literal_global = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 +-@__block_literal_global.1 = internal addrspace(1) constant { i32, i32 } { i32 8, i32 4 }, align 4 ++@__block_literal_global = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3 to i8*) to i8 addrspace(4)*) }, align 4 ++@__block_literal_global.1 = internal addrspace(1) constant { i32, i32, i8 addrspace(4)* } { i32 12, i32 4, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4 to i8*) to i8 addrspace(4)*) }, align 4 + + ; Function Attrs: convergent noinline nounwind optnone +-define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++define spir_kernel void @device_side_enqueue(i32 addrspace(1)* %a, i32 addrspace(1)* %b, i32 %i, i8 signext %c0) #0 !kernel_arg_addr_space !3 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { + entry: ++ %a.addr = alloca i32 addrspace(1)*, align 4 ++ %b.addr = alloca i32 addrspace(1)*, align 4 ++ %i.addr = alloca i32, align 4 ++ %c0.addr = alloca i8, align 1 + %default_queue = alloca %opencl.queue_t*, align 4 + %flags = alloca i32, align 4 + %ndrange = alloca %struct.ndrange_t, align 4 + %clk_event = alloca %opencl.clk_event_t*, align 4 + %event_wait_list = alloca %opencl.clk_event_t*, align 4 + %event_wait_list2 = alloca [1 x %opencl.clk_event_t*], align 4 +- %block = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, align 4 +- %block3 = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 ++ %block = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, align 4 ++ %tmp = alloca %struct.ndrange_t, align 4 ++ %block3 = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, align 4 ++ %tmp4 = alloca %struct.ndrange_t, align 4 + %c = alloca i8, align 1 ++ %tmp11 = alloca %struct.ndrange_t, align 4 ++ %block_sizes = alloca [1 x i32], align 4 ++ %tmp12 = alloca %struct.ndrange_t, align 4 ++ %block_sizes13 = alloca [3 x i32], align 4 ++ store i32 addrspace(1)* %a, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %b, i32 addrspace(1)** %b.addr, align 4 ++ store i32 %i, i32* %i.addr, align 4 ++ store i8 %c0, i8* %c0.addr, align 1 + store i32 0, i32* %flags, align 4 + %arrayinit.begin = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 + %0 = load %opencl.clk_event_t*, %opencl.clk_event_t** %clk_event, align 4 + store %opencl.clk_event_t* %0, %opencl.clk_event_t** %arrayinit.begin, align 4 + %1 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 + %2 = load i32, i32* %flags, align 4 +- %block.size = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 +- store i32 17, i32* %block.size, align 4 +- %block.align = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 ++ %3 = bitcast %struct.ndrange_t* %tmp to i8* ++ %4 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %3, i8* align 4 %4, i32 4, i1 false) ++ %block.size = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 0 ++ store i32 21, i32* %block.size, align 4 ++ %block.align = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 1 + store i32 4, i32* %block.align, align 4 +- %block.captured = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 +- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured, align 4 +- %block.captured1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 +- store i32 %i, i32* %block.captured1, align 4 +- %block.captured2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 +- store i8 %c0, i8* %block.captured2, align 4 +- %3 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i8 }>* %block to void ()* +- %4 = addrspacecast void ()* %3 to i8 addrspace(4)* ++ %block.invoke = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 2 ++ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke, align 4 ++ %block.captured = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 3 ++ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %5, i32 addrspace(1)** %block.captured, align 4 ++ %block.captured1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 4 ++ %6 = load i32, i32* %i.addr, align 4 ++ store i32 %6, i32* %block.captured1, align 4 ++ %block.captured2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block, i32 0, i32 5 ++ %7 = load i8, i8* %c0.addr, align 1 ++ store i8 %7, i8* %block.captured2, align 4 ++ %8 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>* %block to %struct.__opencl_block_literal_generic* ++ %9 = addrspacecast %struct.__opencl_block_literal_generic* %8 to i8 addrspace(4)* + + ; CHECK-SPIRV: PtrCastToGeneric [[Int8PtrGenTy]] [[BlockLit1:[0-9]+]] + ; CHECK-SPIRV: EnqueueKernel [[Int32Ty]] {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} {{[0-9]+}} + ; [[ConstInt0]] [[EventNull]] [[EventNull]] + ; [[BlockKer1]] [[BlockLit1]] [[ConstInt17]] [[ConstInt8]] + +-; CHECK-LLVM: [[Block2:%[0-9]+]] = addrspacecast [[BlockTy2]]* %block to i8 addrspace(4)* ++; CHECK-LLVM: [[Block2:%[0-9]+]] = bitcast [[BlockTy2]]* %block to %struct.__opencl_block_literal_generic* ++; CHECK-LLVM: [[Block2Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block2]] to i8 addrspace(4)* + ; CHECK-LLVM: [[BlockInv2:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8 addrspace(4)* +-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2]]) +- +- %5 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %4) +- %6 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* +- %7 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* +- %block.size5 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 +- store i32 20, i32* %block.size5, align 4 +- %block.align6 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 ++; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv2]], i8 addrspace(4)* [[Block2Ptr]]) ++ ++ %10 = call i32 @__enqueue_kernel_basic(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* byval %tmp, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) ++ %11 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %12 = load i32, i32* %flags, align 4 ++ %13 = bitcast %struct.ndrange_t* %tmp4 to i8* ++ %14 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %13, i8* align 4 %14, i32 4, i1 false) ++ %15 = addrspacecast %opencl.clk_event_t** %event_wait_list to %opencl.clk_event_t* addrspace(4)* ++ %16 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* ++ %block.size5 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 0 ++ store i32 24, i32* %block.size5, align 4 ++ %block.align6 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 1 + store i32 4, i32* %block.align6, align 4 +- %block.captured7 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 +- store i32 addrspace(1)* %a, i32 addrspace(1)** %block.captured7, align 4 +- %block.captured8 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 +- store i32 %i, i32* %block.captured8, align 4 +- %block.captured9 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 +- store i32 addrspace(1)* %b, i32 addrspace(1)** %block.captured9, align 4 +- %8 = bitcast <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to void ()* +- %9 = addrspacecast void ()* %8 to i8 addrspace(4)* ++ %block.invoke7 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 2 ++ store i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2 to i8*) to i8 addrspace(4)*), i8 addrspace(4)** %block.invoke7, align 4 ++ %block.captured8 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 3 ++ %17 = load i32 addrspace(1)*, i32 addrspace(1)** %a.addr, align 4 ++ store i32 addrspace(1)* %17, i32 addrspace(1)** %block.captured8, align 4 ++ %block.captured9 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 4 ++ %18 = load i32, i32* %i.addr, align 4 ++ store i32 %18, i32* %block.captured9, align 4 ++ %block.captured10 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3, i32 0, i32 5 ++ %19 = load i32 addrspace(1)*, i32 addrspace(1)** %b.addr, align 4 ++ store i32 addrspace(1)* %19, i32 addrspace(1)** %block.captured10, align 4 ++ %20 = bitcast <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>* %block3 to %struct.__opencl_block_literal_generic* ++ %21 = addrspacecast %struct.__opencl_block_literal_generic* %20 to i8 addrspace(4)* ++ + + ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event1:[0-9]+]] + ; CHECK-SPIRV: PtrCastToGeneric [[EventPtrTy]] [[Event2:[0-9]+]] +@@ -158,16 +193,24 @@ entry: + ; [[ConstInt2]] [[Event1]] [[Event2]] + ; [[BlockKer2]] [[BlockLit2]] [[ConstInt20]] [[ConstInt8]] + +-; CHECK-LLVM: [[Block3:%[0-9]+]] = addrspacecast [[BlockTy3]]* %block3 to i8 addrspace(4)* ++; CHECK-LLVM: [[Block3:%[0-9]+]] = bitcast [[BlockTy3]]* %block3 to %struct.__opencl_block_literal_generic* ++; CHECK-LLVM: [[Block3Ptr:%[0-9]+]] = addrspacecast %struct.__opencl_block_literal_generic* [[Block3]] to i8 addrspace(4) + ; CHECK-LLVM: [[BlockInv3:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8 addrspace(4)* +-; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3]]) +- +- %10 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %9) +- %11 = alloca [1 x i32] +- %12 = getelementptr [1 x i32], [1 x i32]* %11, i32 0, i32 0 +- %13 = load i8, i8* %c, align 1 +- %14 = zext i8 %13 to i32 +- store i32 %14, i32* %12, align 4 ++; CHECK-LLVM: call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv3]], i8 addrspace(4)* [[Block3Ptr]]) ++ ++ %22 = call i32 @__enqueue_kernel_basic_events(%opencl.queue_t* %11, i32 %12, %struct.ndrange_t* %tmp4, i32 2, %opencl.clk_event_t* addrspace(4)* %15, %opencl.clk_event_t* addrspace(4)* %16, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*)* @__device_side_enqueue_block_invoke_2_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* %21) ++ %23 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %24 = load i32, i32* %flags, align 4 ++ %25 = bitcast %struct.ndrange_t* %tmp11 to i8* ++ %26 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %25, i8* align 4 %26, i32 4, i1 false) ++ %arraydecay = getelementptr inbounds [1 x %opencl.clk_event_t*], [1 x %opencl.clk_event_t*]* %event_wait_list2, i32 0, i32 0 ++ %27 = addrspacecast %opencl.clk_event_t** %arraydecay to %opencl.clk_event_t* addrspace(4)* ++ %28 = addrspacecast %opencl.clk_event_t** %clk_event to %opencl.clk_event_t* addrspace(4)* ++ %29 = getelementptr [1 x i32], [1 x i32]* %block_sizes, i32 0, i32 0 ++ %30 = load i8, i8* %c, align 1 ++ %31 = zext i8 %30 to i32 ++ store i32 %31, i32* %29, align 4 + + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf31:[0-9]+]] + ; CHECK-SPIRV: Bitcast {{[0-9]+}} [[BlockLit3Tmp:[0-9]+]] [[BlockGlb1:[0-9]+]] +@@ -182,14 +225,18 @@ entry: + ; CHECK-LLVM: [[BlockInv0:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8 addrspace(4)* + ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 2, %opencl.clk_event_t* addrspace(4)* {{.*}}, %opencl.clk_event_t* addrspace(4)* {{.*}}, i8 addrspace(4)* [[BlockInv0]], i8 addrspace(4)* [[Block0]], i32 1, i32* {{.*}}) + +- %15 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i32 2, %opencl.clk_event_t* addrspace(4)* %6, %opencl.clk_event_t* addrspace(4)* %7, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %12) +- %16 = alloca [3 x i32] +- %17 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 0 +- store i32 1, i32* %17, align 4 +- %18 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 1 +- store i32 2, i32* %18, align 4 +- %19 = getelementptr [3 x i32], [3 x i32]* %16, i32 0, i32 2 +- store i32 4, i32* %19, align 4 ++ %32 = call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* %23, i32 %24, %struct.ndrange_t* %tmp11, i32 2, %opencl.clk_event_t* addrspace(4)* %27, %opencl.clk_event_t* addrspace(4)* %28, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_3_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global to i8 addrspace(1)*) to i8 addrspace(4)*), i32 1, i32* %29) ++ %33 = load %opencl.queue_t*, %opencl.queue_t** %default_queue, align 4 ++ %34 = load i32, i32* %flags, align 4 ++ %35 = bitcast %struct.ndrange_t* %tmp12 to i8* ++ %36 = bitcast %struct.ndrange_t* %ndrange to i8* ++ call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 %35, i8* align 4 %36, i32 4, i1 false) ++ %37 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 0 ++ store i32 1, i32* %37, align 4 ++ %38 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 1 ++ store i32 2, i32* %38, align 4 ++ %39 = getelementptr [3 x i32], [3 x i32]* %block_sizes13, i32 0, i32 2 ++ store i32 4, i32* %39, align 4 + + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf41:[0-9]+]] + ; CHECK-SPIRV: PtrAccessChain [[Int32LocPtrTy]] [[LocalBuf42:[0-9]+]] +@@ -206,24 +253,28 @@ entry: + ; CHECK-LLVM: [[BlockInv1:%[0-9]+]] = addrspacecast void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8 addrspace(4)* + ; CHECK-LLVM: call i32 @__enqueue_kernel_events_varargs(%opencl.queue_t* {{.*}}, i32 {{.*}}, %struct.ndrange_t* {{.*}}, i32 0, %opencl.clk_event_t* addrspace(4)* null, %opencl.clk_event_t* addrspace(4)* null, i8 addrspace(4)* [[BlockInv1]], i8 addrspace(4)* [[Block1]], i32 3, i32* {{.*}}) + +- %20 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %1, i32 %2, %struct.ndrange_t* %ndrange, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32 } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %17) ++ %40 = call i32 @__enqueue_kernel_varargs(%opencl.queue_t* %33, i32 %34, %struct.ndrange_t* %tmp12, i8 addrspace(4)* addrspacecast (i8* bitcast (void (i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*)* @__device_side_enqueue_block_invoke_4_kernel to i8*) to i8 addrspace(4)*), i8 addrspace(4)* addrspacecast (i8 addrspace(1)* bitcast ({ i32, i32, i8 addrspace(4)* } addrspace(1)* @__block_literal_global.1 to i8 addrspace(1)*) to i8 addrspace(4)*), i32 3, i32* %37) ++ + ret void + } + ++; Function Attrs: argmemonly nounwind ++declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture writeonly, i8* nocapture readonly, i32, i1) #1 ++ + ; Function Attrs: convergent noinline nounwind optnone + define internal spir_func void @__device_side_enqueue_block_invoke(i8 addrspace(4)* %.block_descriptor) #2 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* +- store <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 +- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)** %block.addr, align 4 ++ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 5 + %0 = load i8, i8 addrspace(4)* %block.capture.addr, align 4 + %conv = sext i8 %0 to i32 +- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 2 ++ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 + %1 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr1, align 4 +- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i8 }> addrspace(4)* %block, i32 0, i32 4 + %2 = load i32, i32 addrspace(4)* %block.capture.addr2, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %1, i32 %2 + store i32 %conv, i32 addrspace(1)* %arrayidx, align 4 +@@ -243,19 +294,19 @@ declare i32 @__enqueue_kernel_basic(%opencl.queue_t*, i32, %struct.ndrange_t*, i + define internal spir_func void @__device_side_enqueue_block_invoke_2(i8 addrspace(4)* %.block_descriptor) #2 { + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 +- %block.addr = alloca <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* +- store <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 +- %block.capture.addr = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* ++ store <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)** %block.addr, align 4 ++ %block.capture.addr = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 5 + %0 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr, align 4 +- %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr1 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 + %1 = load i32, i32 addrspace(4)* %block.capture.addr1, align 4 + %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %0, i32 %1 + %2 = load i32, i32 addrspace(1)* %arrayidx, align 4 +- %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 2 ++ %block.capture.addr2 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 + %3 = load i32 addrspace(1)*, i32 addrspace(1)* addrspace(4)* %block.capture.addr2, align 4 +- %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 3 ++ %block.capture.addr3 = getelementptr inbounds <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }>, <{ i32, i32, i8 addrspace(4)*, i32 addrspace(1)*, i32, i32 addrspace(1)* }> addrspace(4)* %block, i32 0, i32 4 + %4 = load i32, i32 addrspace(4)* %block.capture.addr3, align 4 + %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %3, i32 %4 + store i32 %2, i32 addrspace(1)* %arrayidx4, align 4 +@@ -276,11 +327,11 @@ define internal spir_func void @__device_side_enqueue_block_invoke_3(i8 addrspac + entry: + %.block_descriptor.addr = alloca i8 addrspace(4)*, align 4 + %p.addr = alloca i8 addrspace(3)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store i8 addrspace(3)* %p, i8 addrspace(3)** %p.addr, align 4 +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +@@ -300,13 +351,13 @@ entry: + %p1.addr = alloca i8 addrspace(3)*, align 4 + %p2.addr = alloca i8 addrspace(3)*, align 4 + %p3.addr = alloca i8 addrspace(3)*, align 4 +- %block.addr = alloca <{ i32, i32 }> addrspace(4)*, align 4 ++ %block.addr = alloca <{ i32, i32, i8 addrspace(4)* }> addrspace(4)*, align 4 + store i8 addrspace(4)* %.block_descriptor, i8 addrspace(4)** %.block_descriptor.addr, align 4 +- %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32 }> addrspace(4)* ++ %block = bitcast i8 addrspace(4)* %.block_descriptor to <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* + store i8 addrspace(3)* %p1, i8 addrspace(3)** %p1.addr, align 4 + store i8 addrspace(3)* %p2, i8 addrspace(3)** %p2.addr, align 4 + store i8 addrspace(3)* %p3, i8 addrspace(3)** %p3.addr, align 4 +- store <{ i32, i32 }> addrspace(4)* %block, <{ i32, i32 }> addrspace(4)** %block.addr, align 4 ++ store <{ i32, i32, i8 addrspace(4)* }> addrspace(4)* %block, <{ i32, i32, i8 addrspace(4)* }> addrspace(4)** %block.addr, align 4 + ret void + } + +@@ -329,27 +380,20 @@ declare i32 @__enqueue_kernel_varargs(%opencl.queue_t*, i32, %struct.ndrange_t*, + ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_3_kernel(i8 addrspace(4)*, i8 addrspace(3)*) + ; CHECK-LLVM-DAG: define spir_kernel void @__device_side_enqueue_block_invoke_4_kernel(i8 addrspace(4)*, i8 addrspace(3)*, i8 addrspace(3)*, i8 addrspace(3)*) + +-attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #1 = { argmemonly nounwind } +-attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + attributes #3 = { nounwind } + + !llvm.module.flags = !{!0} +-!opencl.enable.FP_CONTRACT = !{} + !opencl.ocl.version = !{!1} + !opencl.spir.version = !{!1} +-!opencl.used.extensions = !{!2} +-!opencl.used.optional.core.features = !{!2} +-!opencl.compiler.options = !{!2} +-!llvm.ident = !{!3} ++!llvm.ident = !{!2} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 2, i32 0} +-!2 = !{} +-!3 = !{!"clang version 7.0.0"} +-!4 = !{i32 1, i32 1, i32 0, i32 0} +-!5 = !{!"none", !"none", !"none", !"none"} +-!6 = !{!"int*", !"int*", !"int", !"char"} +-!7 = !{!"", !"", !"", !""} +-!8 = !{i1 false, i1 false, i1 false, i1 false} +-!9 = !{i32 0, i32 0, i32 0, i32 0} ++!2 = !{!"clang version 9.0.0 (https://llvm.org/git/clang 04fb8964a801a5c5d7baa5a22272243a7d183896) (https://llvm.org/git/llvm 384f64397f6ad95a361b72d62c07d7bac9f24163)"} ++!3 = !{i32 1, i32 1, i32 0, i32 0} ++!4 = !{!"none", !"none", !"none", !"none"} ++!5 = !{!"int*", !"int*", !"int", !"char"} ++!6 = !{!"", !"", !"", !""} +-- +2.17.1 + diff --git a/patches/spirv/0007-Backport-Implementation-of-SPV_INTEL_function_pointe.patch b/patches/spirv/0007-Backport-Implementation-of-SPV_INTEL_function_pointe.patch new file mode 100644 index 00000000..87c10f6b --- /dev/null +++ b/patches/spirv/0007-Backport-Implementation-of-SPV_INTEL_function_pointe.patch @@ -0,0 +1,1079 @@ +From 245c9891ddc5345e651c7ce1d591e4d721abcd63 Mon Sep 17 00:00:00 2001 +From: Aleksandr Bezzubikov +Date: Fri, 15 May 2020 23:32:08 +0300 +Subject: [PATCH 07/10] [Backport] Implementation of + SPV_INTEL_function_pointers extension Original commit: + https://github.com/KhronosGroup/SPIRV-LLVM-Translator/commit/934d50e + +--- + lib/SPIRV/OCLUtil.h | 7 + + lib/SPIRV/SPIRVReader.cpp | 39 +++- + lib/SPIRV/SPIRVRegularizeLLVM.cpp | 4 +- + lib/SPIRV/SPIRVWriter.cpp | 48 ++++- + lib/SPIRV/SPIRVWriter.h | 2 + + lib/SPIRV/libSPIRV/SPIRVEnum.h | 2 + + lib/SPIRV/libSPIRV/SPIRVErrorEnum.h | 1 + + lib/SPIRV/libSPIRV/SPIRVInstruction.cpp | 25 +++ + lib/SPIRV/libSPIRV/SPIRVInstruction.h | 41 ++++ + lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h | 1 + + lib/SPIRV/libSPIRV/SPIRVModule.cpp | 20 ++ + lib/SPIRV/libSPIRV/SPIRVModule.h | 6 + + lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h | 3 + + lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h | 2 + + lib/SPIRV/libSPIRV/spirv.hpp | 5 + + .../fp-from-host.ll | 69 +++++++ + .../function-pointer-as-function-arg.ll | 176 ++++++++++++++++++ + .../function-pointer.ll | 91 +++++++++ + .../non-uniform-function-pointer.ll | 138 ++++++++++++++ + .../referenced-indirectly.ll | 81 ++++++++ + 20 files changed, 753 insertions(+), 8 deletions(-) + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/fp-from-host.ll + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/function-pointer.ll + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/referenced-indirectly.ll + +diff --git a/lib/SPIRV/OCLUtil.h b/lib/SPIRV/OCLUtil.h +index 37d21d9..cc0fba5 100644 +--- a/lib/SPIRV/OCLUtil.h ++++ b/lib/SPIRV/OCLUtil.h +@@ -416,6 +416,13 @@ bool isSamplerTy(Type *Ty); + // If so, it applies ContractionOff ExecutionMode to the kernel. + void checkFpContract(BinaryOperator *B, SPIRVBasicBlock *BB); + ++template std::string toString(const T *Object) { ++ std::string S; ++ llvm::raw_string_ostream RSOS(S); ++ Object->print(RSOS); ++ RSOS.flush(); ++ return S; ++} + } // namespace OCLUtil + + /////////////////////////////////////////////////////////////////////////////// +diff --git a/lib/SPIRV/SPIRVReader.cpp b/lib/SPIRV/SPIRVReader.cpp +index 2a8bded..aa3013d 100644 +--- a/lib/SPIRV/SPIRVReader.cpp ++++ b/lib/SPIRV/SPIRVReader.cpp +@@ -508,8 +508,19 @@ std::string SPIRVToLLVM::transTypeToOCLTypeName(SPIRVType *T, bool IsSigned) { + break; + case OpTypeArray: + return "array"; +- case OpTypePointer: +- return transTypeToOCLTypeName(T->getPointerElementType()) + "*"; ++ case OpTypePointer: { ++ SPIRVType *ET = T->getPointerElementType(); ++ if (isa(ET)) { ++ SPIRVTypeFunction *TF = static_cast(ET); ++ std::string name = transTypeToOCLTypeName(TF->getReturnType()); ++ name += " (*)("; ++ for (unsigned I = 0, E = TF->getNumParameters(); I < E; ++I) ++ name += transTypeToOCLTypeName(TF->getParameterType(I)) + ','; ++ name.back() = ')'; // replace the last comma with a closing brace. ++ return name; ++ } ++ return transTypeToOCLTypeName(ET) + "*"; ++ } + case OpTypeVector: + return transTypeToOCLTypeName(T->getVectorComponentType()) + + T->getVectorComponentCount(); +@@ -1574,6 +1585,26 @@ Value *SPIRVToLLVM::transValueWithoutDecoration(SPIRVValue *BV, Function *F, + case OpAsmCallINTEL: + return mapValue( + BV, transAsmCallINTEL(static_cast(BV), F, BB)); ++ case OpFunctionPointerCallINTEL: { ++ SPIRVFunctionPointerCallINTEL *BC = ++ static_cast(BV); ++ auto Call = CallInst::Create(transValue(BC->getCalledValue(), F, BB), ++ transValue(BC->getArgumentValues(), F, BB), ++ BC->getName(), BB); ++ // Assuming we are calling a regular device function ++ Call->setCallingConv(CallingConv::SPIR_FUNC); ++ // Don't set attributes, because at translation time we don't know which ++ // function exactly we are calling. ++ return mapValue(BV, Call); ++ } ++ ++ case OpFunctionPointerINTEL: { ++ SPIRVFunctionPointerINTEL *BC = ++ static_cast(BV); ++ SPIRVFunction *F = BC->getFunction(); ++ BV->setName(F->getName()); ++ return mapValue(BV, transFunction(F)); ++ } + + case OpExtInst: { + auto *ExtInst = static_cast(BV); +@@ -1715,6 +1746,10 @@ Function *SPIRVToLLVM::transFunction(SPIRVFunction *BF) { + Function *F = cast( + mapValue(BF, Function::Create(FT, Linkage, BF->getName(), M))); + mapFunction(BF, F); ++ ++ if (BF->hasDecorate(DecorationReferencedIndirectlyINTEL)) ++ F->addFnAttr("referenced-indirectly"); ++ + if (!F->isIntrinsic()) { + F->setCallingConv(IsKernel ? CallingConv::SPIR_KERNEL + : CallingConv::SPIR_FUNC); +diff --git a/lib/SPIRV/SPIRVRegularizeLLVM.cpp b/lib/SPIRV/SPIRVRegularizeLLVM.cpp +index 4a1837d..38a4089 100644 +--- a/lib/SPIRV/SPIRVRegularizeLLVM.cpp ++++ b/lib/SPIRV/SPIRVRegularizeLLVM.cpp +@@ -197,8 +197,8 @@ void SPIRVRegularizeLLVM::lowerFuncPtr(Module *M) { + auto AI = F.arg_begin(); + if (hasFunctionPointerArg(&F, AI)) { + auto OC = getSPIRVFuncOC(F.getName()); +- assert(OC != OpNop && "Invalid function pointer usage"); +- Work.push_back(std::make_pair(&F, OC)); ++ if (OC != OpNop) // builtin with a function pointer argument ++ Work.push_back(std::make_pair(&F, OC)); + } + } + for (auto &I : Work) +diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp +index 9afc391..df6f070 100644 +--- a/lib/SPIRV/SPIRVWriter.cpp ++++ b/lib/SPIRV/SPIRVWriter.cpp +@@ -59,6 +59,7 @@ + #include "llvm/ADT/StringSwitch.h" + #include "llvm/ADT/Triple.h" + #include "llvm/Bitcode/BitcodeWriter.h" ++#include "llvm/IR/CallSite.h" + #include "llvm/IR/Constants.h" + #include "llvm/IR/DebugInfo.h" + #include "llvm/IR/DerivedTypes.h" +@@ -285,7 +286,6 @@ SPIRVType *LLVMToSPIRV::transType(Type *T) { + // sampler or pipe type. + if (T->isPointerTy()) { + auto ET = T->getPointerElementType(); +- assert(!ET->isFunctionTy() && "Function pointer type is not allowed"); + auto ST = dyn_cast(ET); + auto AddrSpc = T->getPointerAddressSpace(); + if (ST && !ST->isSized()) { +@@ -520,6 +520,11 @@ SPIRVFunction *LLVMToSPIRV::transFunctionDecl(Function *F) { + BF->addDecorate(DecorationFuncParamAttr, FunctionParameterAttributeZext); + if (Attrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt)) + BF->addDecorate(DecorationFuncParamAttr, FunctionParameterAttributeSext); ++ if (Attrs.hasFnAttribute("referenced-indirectly")) { ++ assert(!oclIsKernel(F) && ++ "kernel function was marked as referenced-indirectly"); ++ BF->addDecorate(DecorationReferencedIndirectlyINTEL); ++ } + SPIRVDBG(dbgs() << "[transFunction] " << *F << " => "; + spvdbgs() << *BF << '\n';) + return BF; +@@ -783,9 +788,19 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, + MemoryAccess[0] |= MemoryAccessNontemporalMask; + if (MemoryAccess.front() == 0) + MemoryAccess.clear(); ++ ++ SPIRVValue *BSV = nullptr; ++ if (Function *F = dyn_cast(ST->getValueOperand())) { ++ // store of function pointer ++ BSV = BM->addFunctionPointerINTELInst( ++ transType(F->getType()), ++ static_cast(transValue(F, BB)), BB); ++ } else { ++ BSV = transValue(ST->getValueOperand(), BB); ++ } ++ + return mapValue(V, BM->addStoreInst(transValue(ST->getPointerOperand(), BB), +- transValue(ST->getValueOperand(), BB), +- MemoryAccess, BB)); ++ BSV, MemoryAccess, BB)); + } + + if (LoadInst *LD = dyn_cast(V)) { +@@ -909,8 +924,17 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, + + if (auto Phi = dyn_cast(V)) { + std::vector IncomingPairs; ++ + for (size_t I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) { +- IncomingPairs.push_back(transValue(Phi->getIncomingValue(I), BB)); ++ SPIRVValue *BV = nullptr; ++ if (Function *F = dyn_cast(Phi->getIncomingValue(I))) { ++ BV = BM->addFunctionPointerINTELInst( ++ transType(F->getType()), ++ static_cast(transValue(F, BB)), BB); ++ } else { ++ BV = transValue(Phi->getIncomingValue(I), BB); ++ } ++ IncomingPairs.push_back(BV); + IncomingPairs.push_back(transValue(Phi->getIncomingBlock(I), nullptr)); + } + return mapValue( +@@ -1201,6 +1225,14 @@ SPIRVValue *LLVMToSPIRV::transCallInst(CallInst *CI, SPIRVBasicBlock *BB) { + if (isa(CI->getCalledValue())) + return transAsmCallINTEL(CI, BB); + ++ llvm::CallSite CS(CI); ++ if (CS.isIndirectCall()) ++ return transIndirectCallInst(CI, BB); ++ return transDirectCallInst(CI, BB); ++} ++ ++SPIRVValue *LLVMToSPIRV::transDirectCallInst(CallInst *CI, ++ SPIRVBasicBlock *BB) { + SPIRVExtInstSetKind ExtSetKind = SPIRVEIS_Count; + SPIRVWord ExtOp = SPIRVWORD_MAX; + llvm::Function *F = CI->getCalledFunction(); +@@ -1258,6 +1290,14 @@ SPIRVValue *LLVMToSPIRV::transAsmCallINTEL(CallInst *CI, SPIRVBasicBlock *BB) { + BB); + } + ++SPIRVValue *LLVMToSPIRV::transIndirectCallInst(CallInst *CI, ++ SPIRVBasicBlock *BB) { ++ return BM->addIndirectCallInst( ++ transValue(CI->getCalledValue(), BB), transType(CI->getType()), ++ transArguments(CI, BB, SPIRVEntry::createUnique(OpFunctionCall).get()), ++ BB); ++} ++ + bool LLVMToSPIRV::transAddressingMode() { + Triple TargetTriple(M->getTargetTriple()); + Triple::ArchType Arch = TargetTriple.getArch(); +diff --git a/lib/SPIRV/SPIRVWriter.h b/lib/SPIRV/SPIRVWriter.h +index 26b1a4a..904fd48 100644 +--- a/lib/SPIRV/SPIRVWriter.h ++++ b/lib/SPIRV/SPIRVWriter.h +@@ -101,6 +101,8 @@ public: + SPIRVValue *transCallInst(CallInst *Call, SPIRVBasicBlock *BB); + SPIRVValue *transAsmINTEL(InlineAsm *Asm); + SPIRVValue *transAsmCallINTEL(CallInst *Call, SPIRVBasicBlock *BB); ++ SPIRVValue *transDirectCallInst(CallInst *Call, SPIRVBasicBlock *BB); ++ SPIRVValue *transIndirectCallInst(CallInst *Call, SPIRVBasicBlock *BB); + bool transDecoration(Value *V, SPIRVValue *BV); + SPIRVWord transFunctionControlMask(CallInst *); + SPIRVWord transFunctionControlMask(Function *); +diff --git a/lib/SPIRV/libSPIRV/SPIRVEnum.h b/lib/SPIRV/libSPIRV/SPIRVEnum.h +index 0b4a295..f4efccb 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVEnum.h +@@ -339,6 +339,8 @@ template <> inline void SPIRVMap::init() { + ADD_VEC_INIT(DecorationInputAttachmentIndex, {CapabilityInputAttachment}); + ADD_VEC_INIT(DecorationAlignment, {CapabilityKernel}); + ADD_VEC_INIT(DecorationSideEffectsINTEL, {CapabilityAsmINTEL}); ++ ADD_VEC_INIT(DecorationReferencedIndirectlyINTEL, ++ {CapabilityIndirectReferencesINTEL}); + } + + template <> inline void SPIRVMap::init() { +diff --git a/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h b/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h +index 6df2fb2..d45b88a 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVErrorEnum.h +@@ -10,5 +10,6 @@ _SPIRV_OP(InvalidMemoryModel, "Expects 0-3.") + _SPIRV_OP(InvalidFunctionControlMask, "") + _SPIRV_OP(InvalidBuiltinSetName, "Expects OpenCL.std.") + _SPIRV_OP(InvalidFunctionCall, "Unexpected llvm intrinsic:") ++_SPIRV_OP(FunctionPointers, "Can't translate function pointer:\n") + + #endif +diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.cpp b/lib/SPIRV/libSPIRV/SPIRVInstruction.cpp +index 2172bc6..ff97b3b 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVInstruction.cpp ++++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.cpp +@@ -119,6 +119,31 @@ void SPIRVFunctionCall::validate() const { + SPIRVFunctionCallGeneric::validate(); + } + ++SPIRVFunctionPointerCallINTEL::SPIRVFunctionPointerCallINTEL( ++ SPIRVId TheId, SPIRVValue *TheCalledValue, SPIRVType *TheReturnType, ++ const std::vector &TheArgs, SPIRVBasicBlock *BB) ++ : SPIRVFunctionCallGeneric(TheReturnType, TheId, TheArgs, BB), ++ CalledValueId(TheCalledValue->getId()) { ++ validate(); ++} ++ ++void SPIRVFunctionPointerCallINTEL::validate() const { ++ SPIRVFunctionCallGeneric::validate(); ++} ++ ++SPIRVFunctionPointerINTEL::SPIRVFunctionPointerINTEL(SPIRVId TheId, ++ SPIRVType *TheType, ++ SPIRVFunction *TheFunction, ++ SPIRVBasicBlock *BB) ++ : SPIRVInstruction(FixedWordCount, OC, TheType, TheId, BB), ++ TheFunction(TheFunction->getId()) { ++ validate(); ++} ++ ++void SPIRVFunctionPointerINTEL::validate() const { ++ SPIRVInstruction::validate(); ++} ++ + // ToDo: Each instruction should implement this function + std::vector SPIRVInstruction::getOperands() { + std::vector Empty; +diff --git a/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/lib/SPIRV/libSPIRV/SPIRVInstruction.h +index 508c54c..55e38d8 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVInstruction.h ++++ b/lib/SPIRV/libSPIRV/SPIRVInstruction.h +@@ -1375,6 +1375,47 @@ protected: + SPIRVId FunctionId; + }; + ++class SPIRVFunctionPointerCallINTEL ++ : public SPIRVFunctionCallGeneric { ++public: ++ SPIRVFunctionPointerCallINTEL(SPIRVId TheId, SPIRVValue *TheCalledValue, ++ SPIRVType *TheReturnType, ++ const std::vector &TheArgs, ++ SPIRVBasicBlock *BB); ++ SPIRVFunctionPointerCallINTEL() : CalledValueId(SPIRVID_INVALID) {} ++ SPIRVValue *getCalledValue() const { return get(CalledValueId); } ++ _SPIRV_DEF_ENCDEC4(Type, Id, CalledValueId, Args) ++ void validate() const override; ++ bool isOperandLiteral(unsigned Index) const override { return false; } ++ SPIRVCapVec getRequiredCapability() const override { ++ return getVec(CapabilityFunctionPointersINTEL); ++ } ++ ++protected: ++ SPIRVId CalledValueId; ++}; ++ ++class SPIRVFunctionPointerINTEL : public SPIRVInstruction { ++ const static Op OC = OpFunctionPointerINTEL; ++ const static SPIRVWord FixedWordCount = 4; ++ ++public: ++ SPIRVFunctionPointerINTEL(SPIRVId TheId, SPIRVType *TheType, ++ SPIRVFunction *TheFunction, SPIRVBasicBlock *BB); ++ SPIRVFunctionPointerINTEL() ++ : SPIRVInstruction(OC), TheFunction(SPIRVID_INVALID) {} ++ SPIRVFunction *getFunction() const { return get(TheFunction); } ++ _SPIRV_DEF_ENCDEC3(Type, Id, TheFunction) ++ void validate() const override; ++ bool isOperandLiteral(unsigned Index) const override { return false; } ++ SPIRVCapVec getRequiredCapability() const override { ++ return getVec(CapabilityFunctionPointersINTEL); ++ } ++ ++protected: ++ SPIRVId TheFunction; ++}; ++ + class SPIRVExtInst : public SPIRVFunctionCallGeneric { + public: + SPIRVExtInst(SPIRVType *TheType, SPIRVId TheId, SPIRVId TheBuiltinSet, +diff --git a/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +index 0423951..97d2239 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h +@@ -397,6 +397,7 @@ inline bool isValid(spv::Decoration V) { + case DecorationInputAttachmentIndex: + case DecorationAlignment: + case DecorationMaxByteOffset: ++ case DecorationReferencedIndirectlyINTEL: + return true; + default: + return false; +diff --git a/lib/SPIRV/libSPIRV/SPIRVModule.cpp b/lib/SPIRV/libSPIRV/SPIRVModule.cpp +index 7308992..b2c64a3 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVModule.cpp ++++ b/lib/SPIRV/libSPIRV/SPIRVModule.cpp +@@ -281,6 +281,11 @@ public: + SPIRVInstruction *addAsmCallINTELInst(SPIRVAsmINTEL *, + const std::vector &, + SPIRVBasicBlock *) override; ++ SPIRVInstruction *addIndirectCallInst(SPIRVValue *, SPIRVType *, ++ const std::vector &, ++ SPIRVBasicBlock *) override; ++ SPIRVInstruction *addFunctionPointerINTELInst(SPIRVType *, SPIRVFunction *, ++ SPIRVBasicBlock *) override; + SPIRVInstruction *addCmpInst(Op, SPIRVType *, SPIRVValue *, SPIRVValue *, + SPIRVBasicBlock *) override; + SPIRVInstruction *addLoadInst(SPIRVValue *, const std::vector &, +@@ -1134,6 +1139,21 @@ SPIRVModuleImpl::addAsmCallINTELInst(SPIRVAsmINTEL *TheAsm, + new SPIRVAsmCallINTEL(getId(), TheAsm, TheArguments, BB), BB); + } + ++SPIRVInstruction *SPIRVModuleImpl::addIndirectCallInst( ++ SPIRVValue *TheCalledValue, SPIRVType *TheReturnType, ++ const std::vector &TheArguments, SPIRVBasicBlock *BB) { ++ return addInstruction( ++ new SPIRVFunctionPointerCallINTEL(getId(), TheCalledValue, TheReturnType, ++ TheArguments, BB), ++ BB); ++} ++ ++SPIRVInstruction *SPIRVModuleImpl::addFunctionPointerINTELInst( ++ SPIRVType *TheType, SPIRVFunction *TheFunction, SPIRVBasicBlock *BB) { ++ return addInstruction( ++ new SPIRVFunctionPointerINTEL(getId(), TheType, TheFunction, BB), BB); ++} ++ + SPIRVInstruction *SPIRVModuleImpl::addBinaryInst(Op TheOpCode, SPIRVType *Type, + SPIRVValue *Op1, + SPIRVValue *Op2, +diff --git a/lib/SPIRV/libSPIRV/SPIRVModule.h b/lib/SPIRV/libSPIRV/SPIRVModule.h +index 72d3dd1..9043346 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVModule.h ++++ b/lib/SPIRV/libSPIRV/SPIRVModule.h +@@ -293,6 +293,12 @@ public: + virtual SPIRVInstruction *addAsmCallINTELInst(SPIRVAsmINTEL *, + const std::vector &, + SPIRVBasicBlock *) = 0; ++ virtual SPIRVInstruction *addIndirectCallInst(SPIRVValue *, SPIRVType *, ++ const std::vector &, ++ SPIRVBasicBlock *) = 0; ++ virtual SPIRVInstruction *addFunctionPointerINTELInst(SPIRVType *, ++ SPIRVFunction *, ++ SPIRVBasicBlock *) = 0; + virtual SPIRVInstruction * + addCompositeConstructInst(SPIRVType *, const std::vector &, + SPIRVBasicBlock *) = 0; +diff --git a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +index 13c3813..4c03b8f 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h +@@ -330,6 +330,7 @@ template <> inline void SPIRVMap::init() { + add(DecorationAlignment, "Alignment"); + add(DecorationMaxByteOffset, "MaxByteOffset"); + add(DecorationSideEffectsINTEL, "SideEffectsINTEL"); ++ add(DecorationReferencedIndirectlyINTEL, "ReferencedIndirectlyINTEL"); + } + SPIRV_DEF_NAMEMAP(Decoration, SPIRVDecorationNameMap) + +@@ -476,6 +477,8 @@ template <> inline void SPIRVMap::init() { + "SubgroupAvcMotionEstimationIntraINTEL"); + add(CapabilitySubgroupAvcMotionEstimationChromaINTEL, + "SubgroupAvcMotionEstimationChromaINTEL"); ++ add(CapabilityFunctionPointersINTEL, "FunctionPointersINTEL"); ++ add(CapabilityIndirectReferencesINTEL, "IndirectReferencesINTEL"); + } + SPIRV_DEF_NAMEMAP(Capability, SPIRVCapabilityNameMap) + +diff --git a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +index 65f5e71..d822b80 100644 +--- a/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h ++++ b/lib/SPIRV/libSPIRV/SPIRVOpCodeEnum.h +@@ -305,6 +305,8 @@ _SPIRV_OP(SubgroupImageBlockReadINTEL, 5577) + _SPIRV_OP(SubgroupImageBlockWriteINTEL, 5578) + _SPIRV_OP(SubgroupImageMediaBlockReadINTEL, 5580) + _SPIRV_OP(SubgroupImageMediaBlockWriteINTEL, 5581) ++_SPIRV_OP(FunctionPointerINTEL, 5600) ++_SPIRV_OP(FunctionPointerCallINTEL, 5601) + _SPIRV_OP(AsmTargetINTEL, 5609) + _SPIRV_OP(AsmINTEL, 5610) + _SPIRV_OP(AsmCallINTEL, 5611) +diff --git a/lib/SPIRV/libSPIRV/spirv.hpp b/lib/SPIRV/libSPIRV/spirv.hpp +index a45eb9d..028e6ea 100644 +--- a/lib/SPIRV/libSPIRV/spirv.hpp ++++ b/lib/SPIRV/libSPIRV/spirv.hpp +@@ -386,6 +386,7 @@ enum Decoration { + DecorationPassthroughNV = 5250, + DecorationViewportRelativeNV = 5252, + DecorationSecondaryViewportRelativeNV = 5256, ++ DecorationReferencedIndirectlyINTEL = 5602, + DecorationSideEffectsINTEL = 5608, + DecorationMax = 0x7fffffff, + }; +@@ -653,6 +654,8 @@ enum Capability { + CapabilitySubgroupImageBlockIOINTEL = 5570, + CapabilitySubgroupImageMediaBlockIOINTEL = 5579, + CapabilityAsmINTEL = 5606, ++ CapabilityFunctionPointersINTEL = 5603, ++ CapabilityIndirectReferencesINTEL = 5604, + CapabilitySubgroupAvcMotionEstimationINTEL = 5696, + CapabilitySubgroupAvcMotionEstimationIntraINTEL = 5697, + CapabilitySubgroupAvcMotionEstimationChromaINTEL = 5698, +@@ -984,6 +987,8 @@ enum Op { + OpAsmTargetINTEL = 5609, + OpAsmINTEL = 5610, + OpAsmCallINTEL = 5611, ++ OpFunctionPointerINTEL = 5600, ++ OpFunctionPointerCallINTEL = 5601, + OpVmeImageINTEL = 5699, + OpTypeVmeImageINTEL = 5700, + OpTypeAvcImePayloadINTEL = 5701, +diff --git a/test/transcoding/SPV_INTEL_function_pointers/fp-from-host.ll b/test/transcoding/SPV_INTEL_function_pointers/fp-from-host.ll +new file mode 100644 +index 0000000..5c08060 +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/fp-from-host.ll +@@ -0,0 +1,69 @@ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o %t.spt ++; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o %t.r.bc ++; RUN: llvm-dis %t.r.bc -o %t.r.ll ++; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM ++; ++; Generated from: ++; typedef int (*fp_t)(int); ++; ++; __kernel void test(__global int *fp, __global int *data) { ++; ++; data[0] = ((fp_t)(*fp))(data[1]); ++; } ++; ++; CHECK-SPIRV: Capability FunctionPointersINTEL ++; ++; CHECK-SPIRV: EntryPoint {{[0-9]+}} [[KERNEL_ID:[0-9]+]] "test" ++; CHECK-SPIRV: TypeInt [[INT32_TYPE_ID:[0-9]+]] 32 ++; CHECK-SPIRV: TypePointer [[INT_PTR:[0-9]+]] 5 [[INT32_TYPE_ID]] ++; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[INT32_TYPE_ID]] [[INT32_TYPE_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_TYPE_PTR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[KERNEL_ID]] ++; CHECK-SPIRV: FunctionParameter [[INT_PTR]] [[FP:[0-9]+]] ++; CHECK-SPIRV: Load [[INT32_TYPE_ID]] [[FUNC_ADDR:[0-9]+]] [[FP]] ++; CHECK-SPIRV: ConvertUToPtr [[FOO_TYPE_PTR_ID]] [[FOO_PTR:[0-9]+]] [[FUNC_ADDR]] ++; CHECK-SPIRV: FunctionPointerCallINTEL [[INT32_TYPE_ID]] {{[0-9]+}} [[FOO_PTR]] ++; ++; CHECK-LLVM: define spir_kernel void @test(i32 addrspace(1)* ++; CHECK-LLVM: %{{.*}} = call spir_func i32 %{{.*}}(i32 ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64-unknown-unknown" ++ ++; Function Attrs: convergent nounwind ++define spir_kernel void @test(i32 addrspace(1)* %fp, i32 addrspace(1)* %data) #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++entry: ++ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %data, i64 1 ++ %0 = load i32, i32 addrspace(1)* %arrayidx, align 4, !tbaa !8 ++ %1 = load i32, i32 addrspace(1)* %fp, align 4, !tbaa !8 ++ %2 = inttoptr i32 %1 to i32 (i32)* ++ %call = call spir_func i32 %2(i32 %0) #1 ++ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %data, i64 0 ++ store i32 %call, i32 addrspace(1)* %arrayidx1, align 4, !tbaa !8 ++ ret void ++} ++ ++attributes #0 = { convergent nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent } ++ ++!llvm.module.flags = !{!0} ++!opencl.ocl.version = !{!1} ++!opencl.spir.version = !{!2} ++!llvm.ident = !{!3} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 1, i32 0} ++!2 = !{i32 1, i32 2} ++!3 = !{!"clang version 7.1.0 "} ++!4 = !{i32 1, i32 1} ++!5 = !{!"none", !"none"} ++!6 = !{!"int*", !"int*"} ++!7 = !{!"", !""} ++!8 = !{!9, !9, i64 0} ++!9 = !{!"int", !10, i64 0} ++!10 = !{!"omnipotent char", !11, i64 0} ++!11 = !{!"Simple C/C++ TBAA"} +diff --git a/test/transcoding/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll b/test/transcoding/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll +new file mode 100644 +index 0000000..d1c3f1e +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/function-pointer-as-function-arg.ll +@@ -0,0 +1,176 @@ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o %t.spt ++; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o %t.r.bc ++; RUN: llvm-dis %t.r.bc -o %t.r.ll ++; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM ++; ++; Generated from: ++; int helper(int (*f)(int), int arg) { ++; return f(arg); ++; } ++; ++; int foo(int v) { ++; return v + 1; ++; } ++; ++; int bar(int v) { ++; return v + 2; ++; } ++; ++; __kernel void test(__global int *data, int control) { ++; int (*fp)(int) = 0; ++; ++; if (get_global_id(0) % control == 0) ++; fp = &foo; ++; else ++; fp = &bar; ++; ++; data[get_global_id(0)] = helper(fp, data[get_global_id(0)]); ++; } ++; ++; CHECK-SPIRV: Capability FunctionPointersINTEL ++; ++; CHECK-SPIRV: EntryPoint 6 [[KERNEL_ID:[0-9]+]] "test" ++; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9]+]] 32 ++; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: TypeFunction [[HELPER_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[FOO_PTR_TYPE_ID]] [[TYPE_INT32_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_ALLOCA_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_PTR_TYPE_ID]] ++; CHECK-SPIRV: TypePointer [[TYPE_INT32_ALLOCA_ID:[0-9]+]] {{[0-9]+}} [[TYPE_INT32_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[HELPER_ID:[0-9]+]] {{[0-9]+}} [[HELPER_TYPE_ID]] ++; CHECK-SPIRV: FunctionParameter [[FOO_PTR_TYPE_ID]] [[T_PTR_ARG_ID:[0-9]+]] ++; CHECK-SPIRV: FunctionParameter [[TYPE_INT32_ID:[0-9]+]] [[INT_ARG_ID:[0-9]+]] ++; CHECK-SPIRV: Variable [[FOO_PTR_ALLOCA_TYPE_ID]] [[T_PTR_ALLOCA_ID:[0-9]+]] ++; CHECK-SPIRV: Variable [[TYPE_INT32_ALLOCA_ID]] [[INT_ALLOCA_ID:[0-9]+]] ++; CHECK-SPIRV: Store [[T_PTR_ALLOCA_ID]] [[T_PTR_ARG_ID]] ++; CHECK-SPIRV: Store [[INT_ALLOCA_ID]] [[INT_ARG_ID]] ++; CHECK-SPIRV: Load [[FOO_PTR_TYPE_ID]] [[LOADED_T_PTR:[0-9]+]] [[T_PTR_ALLOCA_ID]] ++; CHECK-SPIRV: Load [[TYPE_INT32_ID]] [[LOADED_INT:[0-9]+]] [[INT_ALLOCA_ID]] ++; CHECK-SPIRV: FunctionPointerCallINTEL [[TYPE_INT32_ID]] [[RESULT:[0-9]+]] [[LOADED_T_PTR]] [[LOADED_INT]] ++; CHECK-SPIRV: ReturnValue [[RESULT]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[FOO_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: Function {{[0-9]+}} [[BAR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[KERNEL_ID]] ++; CHECK-SPIRV: Variable [[FOO_PTR_ALLOCA_TYPE_ID]] [[F_PTR_ALLOCA_ID:[0-9]+]] ++; CHECK-SPIRV: FunctionPointerINTEL [[FOO_PTR_TYPE_ID]] [[FOO_PTR_ID:[0-9]+]] [[FOO_ID]] ++; CHECK-SPIRV: Store [[F_PTR_ALLOCA_ID]] [[FOO_PTR_ID]] ++; CHECK-SPIRV: FunctionPointerINTEL [[FOO_PTR_TYPE_ID]] [[BAR_PTR_ID:[0-9]+]] [[BAR_ID]] ++; CHECK-SPIRV: Store [[F_PTR_ALLOCA_ID]] [[BAR_PTR_ID]] ++; CHECK-SPIRV: Load [[FOO_PTR_TYPE_ID]] [[LOADED_F_PTR:[0-9]+]] [[F_PTR_ALLOCA_ID]] ++; CHECK-SPIRV: FunctionCall {{[0-9]+}} {{[0-9]+}} [[HELPER_ID]] [[LOADED_F_PTR]] ++; ++; CHECK-LLVM: define spir_func i32 @helper(i32 (i32)* %[[F:.*]], ++; CHECK-LLVM: %[[F_ADDR:.*]] = alloca i32 (i32)* ++; CHECK-LLVM: store i32 (i32)* %[[F]], i32 (i32)** %[[F_ADDR]] ++; CHECK-LLVM: %[[F_LOADED:.*]] = load i32 (i32)*, i32 (i32)** %[[F_ADDR]] ++; CHECK-LLVM: %[[CALL:.*]] = call spir_func i32 %[[F_LOADED]] ++; CHECK-LLVM: ret i32 %[[CALL]] ++; ++; CHECK-LLVM: define spir_kernel void @test ++; CHECK-LLVM: %[[FP:.*]] = alloca i32 (i32)* ++; CHECK-LLVM: store i32 (i32)* @foo, i32 (i32)** %[[FP]] ++; CHECK-LLVM: store i32 (i32)* @bar, i32 (i32)** %[[FP]] ++; CHECK-LLVM: %[[FP_LOADED:.*]] = load i32 (i32)*, i32 (i32)** %[[FP]] ++; CHECK-LLVM: call spir_func i32 @helper(i32 (i32)* %[[FP_LOADED]] ++ ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64-unknown-unknown" ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @helper(i32 (i32)* %f, i32 %arg) #0 { ++entry: ++ %f.addr = alloca i32 (i32)*, align 8 ++ %arg.addr = alloca i32, align 4 ++ store i32 (i32)* %f, i32 (i32)** %f.addr, align 8 ++ store i32 %arg, i32* %arg.addr, align 4 ++ %0 = load i32 (i32)*, i32 (i32)** %f.addr, align 8 ++ %1 = load i32, i32* %arg.addr, align 4 ++ %call = call spir_func i32 %0(i32 %1) #3 ++ ret i32 %call ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @foo(i32 %v) #0 { ++entry: ++ %v.addr = alloca i32, align 4 ++ store i32 %v, i32* %v.addr, align 4 ++ %0 = load i32, i32* %v.addr, align 4 ++ %add = add nsw i32 %0, 1 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @bar(i32 %v) #0 { ++entry: ++ %v.addr = alloca i32, align 4 ++ store i32 %v, i32* %v.addr, align 4 ++ %0 = load i32, i32* %v.addr, align 4 ++ %add = add nsw i32 %0, 2 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @test(i32 addrspace(1)* %data, i32 %control) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { ++entry: ++ %data.addr = alloca i32 addrspace(1)*, align 8 ++ %control.addr = alloca i32, align 4 ++ %fp = alloca i32 (i32)*, align 8 ++ store i32 addrspace(1)* %data, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %control, i32* %control.addr, align 4 ++ store i32 (i32)* null, i32 (i32)** %fp, align 8 ++ %call = call spir_func i64 @_Z13get_global_idj(i32 0) #4 ++ %0 = load i32, i32* %control.addr, align 4 ++ %conv = sext i32 %0 to i64 ++ %rem = urem i64 %call, %conv ++ %cmp = icmp eq i64 %rem, 0 ++ br i1 %cmp, label %if.then, label %if.else ++ ++if.then: ; preds = %entry ++ store i32 (i32)* @foo, i32 (i32)** %fp, align 8 ++ br label %if.end ++ ++if.else: ; preds = %entry ++ store i32 (i32)* @bar, i32 (i32)** %fp, align 8 ++ br label %if.end ++ ++if.end: ; preds = %if.else, %if.then ++ %1 = load i32 (i32)*, i32 (i32)** %fp, align 8 ++ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ %call2 = call spir_func i64 @_Z13get_global_idj(i32 0) #4 ++ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 %call2 ++ %3 = load i32, i32 addrspace(1)* %arrayidx, align 4 ++ %call3 = call spir_func i32 @helper(i32 (i32)* %1, i32 %3) #3 ++ %4 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ %call4 = call spir_func i64 @_Z13get_global_idj(i32 0) #4 ++ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %4, i64 %call4 ++ store i32 %call3, i32 addrspace(1)* %arrayidx5, align 4 ++ ret void ++} ++ ++; Function Attrs: convergent nounwind readnone ++declare spir_func i64 @_Z13get_global_idj(i32) #2 ++ ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #3 = { convergent } ++attributes #4 = { convergent nounwind readnone } ++ ++!llvm.module.flags = !{!0} ++!opencl.ocl.version = !{!1} ++!opencl.spir.version = !{!2} ++!llvm.ident = !{!3} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 1, i32 0} ++!2 = !{i32 1, i32 2} ++!3 = !{!"clang version 7.1.0 "} ++!4 = !{!"none", !"none"} ++!5 = !{!"int*", !"int"} ++!6 = !{!"", !""} +diff --git a/test/transcoding/SPV_INTEL_function_pointers/function-pointer.ll b/test/transcoding/SPV_INTEL_function_pointers/function-pointer.ll +new file mode 100644 +index 0000000..febaa3c +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/function-pointer.ll +@@ -0,0 +1,91 @@ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o %t.spt ++; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o %t.r.bc ++; RUN: llvm-dis %t.r.bc -o %t.r.ll ++; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM ++; ++; Generated from: ++; int foo(int arg) { ++; return arg + 10; ++; } ++; ++; void __kernel test(__global int *data, int input) { ++; int (__constant *fp)(int) = &foo; ++; ++; *data = fp(input); ++; } ++; ++; CHECK-SPIRV: Capability FunctionPointersINTEL ++; CHECK-SPIRV: EntryPoint 6 [[KERNEL_ID:[0-9]+]] "test" ++; CHECK-SPIRV: TypeInt [[TYPE_INT_ID:[0-9]+]] ++; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT_ID]] [[TYPE_INT_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_ALLOCA_ID:[0-9]+]] 7 [[FOO_PTR_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[FOO_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: Function {{[0-9]+}} [[KERNEL_ID]] ++; CHECK-SPIRV: Variable [[FOO_PTR_ALLOCA_ID]] [[FOO_PTR_ALLOCA:[0-9]+]] ++; CHECK-SPIRV: FunctionPointerINTEL [[FOO_PTR_ID]] [[FOO_PTR:[0-9]+]] [[FOO_ID]] ++; CHECK-SPIRV: Store [[FOO_PTR_ALLOCA]] [[FOO_PTR]] ++; CHECK-SPIRV: Load [[FOO_PTR_ID]] [[LOADED_FOO_PTR:[0-9]+]] [[FOO_PTR_ALLOCA]] ++; CHECK-SPIRV: FunctionPointerCallINTEL 2 {{[0-9]+}} [[LOADED_FOO_PTR]] ++; ++; CHECK-LLVM: define spir_kernel void @test ++; CHECK-LLVM: %fp = alloca i32 (i32)* ++; CHECK-LLVM: store i32 (i32)* @foo, i32 (i32)** %fp ++; CHECK-LLVM: %0 = load i32 (i32)*, i32 (i32)** %fp ++; CHECK-LLVM: %call = call spir_func i32 %0(i32 %1) ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64-unknown-unknown" ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @foo(i32 %arg) #0 { ++entry: ++ %arg.addr = alloca i32, align 4 ++ store i32 %arg, i32* %arg.addr, align 4 ++ %0 = load i32, i32* %arg.addr, align 4 ++ %add = add nsw i32 %0, 10 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @test(i32 addrspace(1)* %data, i32 %input) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++entry: ++ %data.addr = alloca i32 addrspace(1)*, align 8 ++ %input.addr = alloca i32, align 4 ++ %fp = alloca i32 (i32)*, align 8 ++ store i32 addrspace(1)* %data, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %input, i32* %input.addr, align 4 ++ store i32 (i32)* @foo, i32 (i32)** %fp, align 8 ++ %0 = load i32 (i32)*, i32 (i32)** %fp, align 8 ++ %1 = load i32, i32* %input.addr, align 4 ++ %call = call spir_func i32 %0(i32 %1) #2 ++ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %call, i32 addrspace(1)* %2, align 4 ++ ret void ++} ++ ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent } ++ ++!llvm.module.flags = !{!0} ++!opencl.ocl.version = !{!1} ++!opencl.spir.version = !{!2} ++!opencl.used.extensions = !{!3} ++!opencl.used.optional.core.features = !{!3} ++!opencl.compiler.options = !{!3} ++!llvm.ident = !{!4} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 1, i32 0} ++!2 = !{i32 1, i32 2} ++!3 = !{} ++!4 = !{!"clang version 7.0.0 "} ++!5 = !{!"none", !"none"} ++!6 = !{!"int*", !"int"} ++!7 = !{!"", !""} ++ +diff --git a/test/transcoding/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll b/test/transcoding/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll +new file mode 100644 +index 0000000..30521cd +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/non-uniform-function-pointer.ll +@@ -0,0 +1,138 @@ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o %t.spt ++; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o %t.r.bc ++; RUN: llvm-dis %t.r.bc -o %t.r.ll ++; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM ++; ++; Generated from: ++; int foo(int v) { ++; return v + 1; ++; } ++; ++; int bar(int v) { ++; return v + 2; ++; } ++; ++; __kernel void test(__global int *data, int control) { ++; int (*fp)(int) = 0; ++; ++; if (get_global_id(0) % control == 0) ++; fp = &foo; ++; else ++; fp = &bar; ++; ++; data[get_global_id(0)] = fp(data[get_global_id(0)]); ++; } ++; ++; CHECK-SPIRV: Capability FunctionPointersINTEL ++; ++; CHECK-SPIRV: EntryPoint 6 [[KERNEL_ID:[0-9]+]] "test" ++; CHECK-SPIRV: TypeInt [[TYPE_INT32_ID:[0-9+]]] 32 ++; CHECK-SPIRV: TypeFunction [[FOO_TYPE_ID:[0-9]+]] [[TYPE_INT32_ID]] [[TYPE_INT32_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_TYPE_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: TypePointer [[FOO_PTR_ALLOCA_TYPE_ID:[0-9]+]] 7 [[FOO_PTR_TYPE_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[FOO_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; CHECK-SPIRV: Function {{[0-9]+}} [[BAR_ID:[0-9]+]] {{[0-9]+}} [[FOO_TYPE_ID]] ++; ++; CHECK-SPIRV: Function {{[0-9]+}} [[KERNEL_ID]] ++; CHECK-SPIRV: Variable [[FOO_PTR_ALLOCA_TYPE_ID]] [[FOO_PTR_ALLOCA_ID:[0-9]+]] ++; CHECK-SPIRV: FunctionPointerINTEL [[FOO_PTR_TYPE_ID]] [[FOO_PTR_ID:[0-9]+]] [[FOO_ID]] ++; CHECK-SPIRV: Store [[FOO_PTR_ALLOCA_ID]] [[FOO_PTR_ID]] ++; CHECK-SPIRV: FunctionPointerINTEL [[FOO_PTR_TYPE_ID]] [[BAR_PTR_ID:[0-9]+]] [[BAR_ID]] ++; CHECK-SPIRV: Store [[FOO_PTR_ALLOCA_ID]] [[BAR_PTR_ID]] ++; CHECK-SPIRV: Load [[FOO_PTR_TYPE_ID]] [[LOADED_FOO_PTR:[0-9]+]] [[FOO_PTR_ALLOCA_ID]] ++; CHECK-SPIRV: FunctionPointerCallINTEL {{[0-9]+}} {{[0-9]+}} [[LOADED_FOO_PTR]] ++; ++; CHECK-LLVM: define spir_kernel void @test ++; CHECK-LLVM: %fp = alloca i32 (i32)* ++; CHECK-LLVM: store i32 (i32)* @foo, i32 (i32)** %fp ++; CHECK-LLVM: store i32 (i32)* @bar, i32 (i32)** %fp ++; CHECK-LLVM: %[[FP:.*]] = load i32 (i32)*, i32 (i32)** %fp ++; CHECK-LLVM: call spir_func i32 %[[FP]](i32 %{{.*}}) ++ ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64-unknown-unknown" ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @foo(i32 %v) #0 { ++entry: ++ %v.addr = alloca i32, align 4 ++ store i32 %v, i32* %v.addr, align 4 ++ %0 = load i32, i32* %v.addr, align 4 ++ %add = add nsw i32 %0, 1 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @bar(i32 %v) #0 { ++entry: ++ %v.addr = alloca i32, align 4 ++ store i32 %v, i32* %v.addr, align 4 ++ %0 = load i32, i32* %v.addr, align 4 ++ %add = add nsw i32 %0, 2 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @test(i32 addrspace(1)* %data, i32 %control) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !4 !kernel_arg_type !5 !kernel_arg_base_type !5 !kernel_arg_type_qual !6 { ++entry: ++ %data.addr = alloca i32 addrspace(1)*, align 8 ++ %control.addr = alloca i32, align 4 ++ %fp = alloca i32 (i32)*, align 8 ++ store i32 addrspace(1)* %data, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %control, i32* %control.addr, align 4 ++ store i32 (i32)* null, i32 (i32)** %fp, align 8 ++ %call = call spir_func i64 @_Z13get_global_idj(i32 0) #3 ++ %0 = load i32, i32* %control.addr, align 4 ++ %conv = sext i32 %0 to i64 ++ %rem = urem i64 %call, %conv ++ %cmp = icmp eq i64 %rem, 0 ++ br i1 %cmp, label %if.then, label %if.else ++ ++if.then: ; preds = %entry ++ store i32 (i32)* @foo, i32 (i32)** %fp, align 8 ++ br label %if.end ++ ++if.else: ; preds = %entry ++ store i32 (i32)* @bar, i32 (i32)** %fp, align 8 ++ br label %if.end ++ ++if.end: ; preds = %if.else, %if.then ++ %1 = load i32 (i32)*, i32 (i32)** %fp, align 8 ++ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ %call2 = call spir_func i64 @_Z13get_global_idj(i32 0) #3 ++ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 %call2 ++ %3 = load i32, i32 addrspace(1)* %arrayidx, align 4 ++ %call3 = call spir_func i32 %1(i32 %3) #4 ++ %4 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ %call4 = call spir_func i64 @_Z13get_global_idj(i32 0) #3 ++ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %4, i64 %call4 ++ store i32 %call3, i32 addrspace(1)* %arrayidx5, align 4 ++ ret void ++} ++ ++; Function Attrs: convergent nounwind readnone ++declare spir_func i64 @_Z13get_global_idj(i32) #2 ++ ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent nounwind readnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #3 = { convergent nounwind readnone } ++attributes #4 = { convergent } ++ ++!llvm.module.flags = !{!0} ++!opencl.ocl.version = !{!1} ++!opencl.spir.version = !{!2} ++!llvm.ident = !{!3} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 1, i32 0} ++!2 = !{i32 1, i32 2} ++!3 = !{!"clang version 7.1.0 "} ++!4 = !{!"none", !"none"} ++!5 = !{!"int*", !"int"} ++!6 = !{!"", !""} +diff --git a/test/transcoding/SPV_INTEL_function_pointers/referenced-indirectly.ll b/test/transcoding/SPV_INTEL_function_pointers/referenced-indirectly.ll +new file mode 100644 +index 0000000..bd435c1 +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/referenced-indirectly.ll +@@ -0,0 +1,81 @@ ++; RUN: llvm-as %s -o %t.bc ++; RUN: llvm-spirv %t.bc -spirv-text -o %t.spt ++; RUN: FileCheck < %t.spt %s --check-prefix=CHECK-SPIRV ++; RUN: llvm-spirv %t.bc -o %t.spv ++; RUN: llvm-spirv -r %t.spv -o %t.r.bc ++; RUN: llvm-dis %t.r.bc -o %t.r.ll ++; RUN: FileCheck < %t.r.ll %s --check-prefix=CHECK-LLVM ++; ++; Generated from: ++; __attribute__((referenced_indirectly)) ++; int foo(int arg) { ++; return arg + 10; ++; } ++; ++; void __kernel test(__global int *data, int input) { ++; int (__constant *fp)(int) = &foo; ++; ++; *data = fp(input); ++; } ++; ++; CHECK-SPIRV: Capability FunctionPointersINTEL ++; CHECK-SPIRV: Capability IndirectReferencesINTEL ++; ++; CHECK-SPIRV: Name [[FOO_ID:[0-9]+]] "foo" ++; CHECK-SPIRV: Decorate [[FOO_ID]] ReferencedIndirectlyINTEL ++; CHECK-SPIRV: Function {{[0-9]+}} [[FOO_ID]] ++; ++; CHECK-LLVM: define spir_func i32 @foo(i32 %arg) #[[ATTRS:[0-9]+]] ++; CHECK-LLVM: attributes #[[ATTRS]] = {{.*}} "referenced-indirectly" ++ ++target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir64-unknown-unknown" ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_func i32 @foo(i32 %arg) #0 { ++entry: ++ %arg.addr = alloca i32, align 4 ++ store i32 %arg, i32* %arg.addr, align 4 ++ %0 = load i32, i32* %arg.addr, align 4 ++ %add = add nsw i32 %0, 10 ++ ret i32 %add ++} ++ ++; Function Attrs: convergent noinline nounwind optnone ++define spir_kernel void @test(i32 addrspace(1)* %data, i32 %input) #1 !kernel_arg_addr_space !1 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 { ++entry: ++ %data.addr = alloca i32 addrspace(1)*, align 8 ++ %input.addr = alloca i32, align 4 ++ %fp = alloca i32 (i32)*, align 8 ++ store i32 addrspace(1)* %data, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %input, i32* %input.addr, align 4 ++ store i32 (i32)* @foo, i32 (i32)** %fp, align 8 ++ %0 = load i32 (i32)*, i32 (i32)** %fp, align 8 ++ %1 = load i32, i32* %input.addr, align 4 ++ %call = call spir_func i32 %0(i32 %1) #2 ++ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %data.addr, align 8 ++ store i32 %call, i32 addrspace(1)* %2, align 4 ++ ret void ++} ++ ++attributes #0 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" "referenced-indirectly" } ++attributes #1 = { convergent noinline nounwind optnone "correctly-rounded-divide-sqrt-fp-math"="false" "denorms-are-zero"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "uniform-work-group-size"="true" "unsafe-fp-math"="false" "use-soft-float"="false" } ++attributes #2 = { convergent } ++ ++!llvm.module.flags = !{!0} ++!opencl.ocl.version = !{!1} ++!opencl.spir.version = !{!2} ++!opencl.used.extensions = !{!3} ++!opencl.used.optional.core.features = !{!3} ++!opencl.compiler.options = !{!3} ++!llvm.ident = !{!4} ++ ++!0 = !{i32 1, !"wchar_size", i32 4} ++!1 = !{i32 1, i32 0} ++!2 = !{i32 1, i32 2} ++!3 = !{} ++!4 = !{!"clang version 7.0.0 "} ++!5 = !{!"none", !"none"} ++!6 = !{!"int*", !"int"} ++!7 = !{!"", !""} ++ +-- +2.17.1 + diff --git a/patches/spirv/0008-Support-lowering-of-constant-expressions-vector.patch b/patches/spirv/0008-Support-lowering-of-constant-expressions-vector.patch new file mode 100644 index 00000000..c92bd41c --- /dev/null +++ b/patches/spirv/0008-Support-lowering-of-constant-expressions-vector.patch @@ -0,0 +1,215 @@ +From a8176d6a4172c7233055cd1b62bb2e606e4edcd4 Mon Sep 17 00:00:00 2001 +From: Aleksandr Bezzubikov +Date: Sat, 18 Apr 2020 00:44:26 +0300 +Subject: [PATCH 08/10] Support lowering of constant expressions vector + +Since the translator has to lower LLVM constant expressions to be able to represent them in SPIR-V, +we now need to support not only single constexprs but vectors of constexprs as well. +--- + lib/SPIRV/SPIRVLowerConstExpr.cpp | 64 +++++++++++------ + test/constexpr_vector.ll | 115 ++++++++++++++++++++++++++++++ + 2 files changed, 157 insertions(+), 22 deletions(-) + create mode 100644 test/constexpr_vector.ll + +diff --git a/lib/SPIRV/SPIRVLowerConstExpr.cpp b/lib/SPIRV/SPIRVLowerConstExpr.cpp +index 730f0d9..0c7d04d 100644 +--- a/lib/SPIRV/SPIRVLowerConstExpr.cpp ++++ b/lib/SPIRV/SPIRVLowerConstExpr.cpp +@@ -125,31 +125,51 @@ void SPIRVLowerConstExpr::visit(Module *M) { + auto FBegin = I.begin(); + while (!WorkList.empty()) { + auto II = WorkList.front(); +- WorkList.pop_front(); +- for (unsigned OI = 0, OE = II->getNumOperands(); OI != OE; ++OI) { +- auto Op = II->getOperand(OI); + +- if (auto CE = dyn_cast(Op)) { +- SPIRVDBG(dbgs() << "[lowerConstantExpressions] " << *CE;) +- auto ReplInst = CE->getAsInstruction(); +- auto InsPoint = II->getParent() == &*FBegin ? II : &FBegin->back(); +- ReplInst->insertBefore(InsPoint); +- SPIRVDBG(dbgs() << " -> " << *ReplInst << '\n';) +- WorkList.push_front(ReplInst); +- std::vector Users; +- // Do not replace use during iteration of use. Do it in another loop +- for (auto U : CE->users()) { +- SPIRVDBG(dbgs() +- << "[lowerConstantExpressions] Use: " << *U << '\n';) +- if (auto InstUser = dyn_cast(U)) { +- // Only replace users in scope of current function +- if (InstUser->getParent()->getParent() == &I) +- Users.push_back(InstUser); +- } ++ auto LowerOp = [&II, &FBegin, &I](ConstantExpr *CE) { ++ SPIRVDBG(dbgs() << "[lowerConstantExpressions] " << *CE;) ++ auto ReplInst = CE->getAsInstruction(); ++ auto InsPoint = II->getParent() == &*FBegin ? II : &FBegin->back(); ++ ReplInst->insertBefore(InsPoint); ++ SPIRVDBG(dbgs() << " -> " << *ReplInst << '\n';) ++ std::vector Users; ++ // Do not replace use during iteration of use. Do it in another loop ++ for (auto U : CE->users()) { ++ SPIRVDBG(dbgs() << "[lowerConstantExpressions] Use: " << *U << '\n';) ++ if (auto InstUser = dyn_cast(U)) { ++ // Only replace users in scope of current function ++ if (InstUser->getParent()->getParent() == &I) ++ Users.push_back(InstUser); + } +- for (auto &User : Users) +- User->replaceUsesOfWith(CE, ReplInst); + } ++ for (auto &User : Users) ++ User->replaceUsesOfWith(CE, ReplInst); ++ return ReplInst; ++ }; ++ ++ WorkList.pop_front(); ++ for (unsigned OI = 0, OE = II->getNumOperands(); OI != OE; ++OI) { ++ auto Op = II->getOperand(OI); ++ auto *Vec = dyn_cast(Op); ++ if (Vec && std::all_of(Vec->op_begin(), Vec->op_end(), ++ [](Value *V) { return isa(V); })) { ++ // Expand a vector of constexprs and construct it back with series of ++ // insertelement instructions ++ std::list ReplList; ++ std::transform( ++ Vec->op_begin(), Vec->op_end(), std::back_inserter(ReplList), ++ [LowerOp](Value *V) { return LowerOp(cast(V)); }); ++ Value *Repl = nullptr; ++ unsigned Idx = 0; ++ for (auto V : ReplList) ++ Repl = InsertElementInst::Create( ++ (Repl ? Repl : UndefValue::get(Vec->getType())), V, ++ ConstantInt::get(Type::getInt32Ty(M->getContext()), Idx++), "", ++ II); ++ II->replaceUsesOfWith(Op, Repl); ++ WorkList.splice(WorkList.begin(), ReplList); ++ } else if (auto CE = dyn_cast(Op)) ++ WorkList.push_front(LowerOp(CE)); + } + } + } +diff --git a/test/constexpr_vector.ll b/test/constexpr_vector.ll +new file mode 100644 +index 0000000..6df71e2 +--- /dev/null ++++ b/test/constexpr_vector.ll +@@ -0,0 +1,115 @@ ++; RUN: llvm-as < %s | llvm-spirv -spirv-mem2reg=false -s | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM ++ ++; CHECK-LLVM: define dllexport void @vadd() { ++; CHECK-LLVM-NEXT: entry: ++; CHECK-LLVM-NEXT: %Funcs = alloca <16 x i8>, align 16 ++; CHECK-LLVM-NEXT: %0 = ptrtoint i32 (i32)* @_Z2f1u2CMvb32_j to i64 ++; CHECK-LLVM-NEXT: %1 = bitcast i64 %0 to <8 x i8> ++; CHECK-LLVM-NEXT: %2 = extractelement <8 x i8> %1, i32 0 ++; CHECK-LLVM-NEXT: %3 = extractelement <8 x i8> %1, i32 1 ++; CHECK-LLVM-NEXT: %4 = extractelement <8 x i8> %1, i32 2 ++; CHECK-LLVM-NEXT: %5 = extractelement <8 x i8> %1, i32 3 ++; CHECK-LLVM-NEXT: %6 = extractelement <8 x i8> %1, i32 4 ++; CHECK-LLVM-NEXT: %7 = extractelement <8 x i8> %1, i32 5 ++; CHECK-LLVM-NEXT: %8 = extractelement <8 x i8> %1, i32 6 ++; CHECK-LLVM-NEXT: %9 = extractelement <8 x i8> %1, i32 7 ++; CHECK-LLVM-NEXT: %10 = ptrtoint i32 (i32)* @_Z2f2u2CMvb32_j to i64 ++; CHECK-LLVM-NEXT: %11 = bitcast i64 %10 to <8 x i8> ++; CHECK-LLVM-NEXT: %12 = extractelement <8 x i8> %11, i32 0 ++; CHECK-LLVM-NEXT: %13 = extractelement <8 x i8> %11, i32 1 ++; CHECK-LLVM-NEXT: %14 = extractelement <8 x i8> %11, i32 2 ++; CHECK-LLVM-NEXT: %15 = extractelement <8 x i8> %11, i32 3 ++; CHECK-LLVM-NEXT: %16 = extractelement <8 x i8> %11, i32 4 ++; CHECK-LLVM-NEXT: %17 = extractelement <8 x i8> %11, i32 5 ++; CHECK-LLVM-NEXT: %18 = extractelement <8 x i8> %11, i32 6 ++; CHECK-LLVM-NEXT: %19 = extractelement <8 x i8> %11, i32 7 ++; CHECK-LLVM-NEXT: %20 = insertelement <16 x i8> undef, i8 %2, i32 0 ++; CHECK-LLVM-NEXT: %21 = insertelement <16 x i8> %20, i8 %3, i32 1 ++; CHECK-LLVM-NEXT: %22 = insertelement <16 x i8> %21, i8 %4, i32 2 ++; CHECK-LLVM-NEXT: %23 = insertelement <16 x i8> %22, i8 %5, i32 3 ++; CHECK-LLVM-NEXT: %24 = insertelement <16 x i8> %23, i8 %6, i32 4 ++; CHECK-LLVM-NEXT: %25 = insertelement <16 x i8> %24, i8 %7, i32 5 ++; CHECK-LLVM-NEXT: %26 = insertelement <16 x i8> %25, i8 %8, i32 6 ++; CHECK-LLVM-NEXT: %27 = insertelement <16 x i8> %26, i8 %9, i32 7 ++; CHECK-LLVM-NEXT: %28 = insertelement <16 x i8> %27, i8 %12, i32 8 ++; CHECK-LLVM-NEXT: %29 = insertelement <16 x i8> %28, i8 %13, i32 9 ++; CHECK-LLVM-NEXT: %30 = insertelement <16 x i8> %29, i8 %14, i32 10 ++; CHECK-LLVM-NEXT: %31 = insertelement <16 x i8> %30, i8 %15, i32 11 ++; CHECK-LLVM-NEXT: %32 = insertelement <16 x i8> %31, i8 %16, i32 12 ++; CHECK-LLVM-NEXT: %33 = insertelement <16 x i8> %32, i8 %17, i32 13 ++; CHECK-LLVM-NEXT: %34 = insertelement <16 x i8> %33, i8 %18, i32 14 ++; CHECK-LLVM-NEXT: %35 = insertelement <16 x i8> %34, i8 %19, i32 15 ++; CHECK-LLVM-NEXT: store <16 x i8> %35, <16 x i8>* %Funcs, align 16 ++; CHECK-LLVM-NEXT: ret void ++; CHECK-LLVM-NEXT: } ++ ++; RUN: llvm-as < %s | llvm-spirv -spirv-text -spirv-mem2reg=false | FileCheck %s --check-prefix=CHECK-SPIRV ++ ++; CHECK-SPIRV: 4 Name [[Funcs:[0-9]+]] "Funcs" ++ ++; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt8:[0-9]+]] 8 0 ++; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt32:[0-9]+]] 32 0 ++; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt64:[0-9]+]] 64 0 ++; CHECK-SPIRV-DAG: 4 TypeVector [[TypeVec16:[0-9]+]] [[TypeInt8]] 16 ++; CHECK-SPIRV-DAG: 4 TypeVector [[TypeVec8:[0-9]+]] [[TypeInt8]] 8 ++; CHECK-SPIRV-DAG: 4 TypePointer [[StorePtr:[0-9]+]] 7 [[TypeVec16]] ++; CHECK-SPIRV-DAG: 3 Undef [[TypeVec16]] [[TypeUndef:[0-9]+]] ++ ++; CHECK-SPIRV: 4 ConvertPtrToU [[TypeInt64]] [[Ptr1:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec1:[0-9]+]] [[Ptr1]] ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v00:[0-9]+]] [[Vec1]] 0 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v01:[0-9]+]] [[Vec1]] 1 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v02:[0-9]+]] [[Vec1]] 2 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v03:[0-9]+]] [[Vec1]] 3 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v04:[0-9]+]] [[Vec1]] 4 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v05:[0-9]+]] [[Vec1]] 5 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v06:[0-9]+]] [[Vec1]] 6 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v07:[0-9]+]] [[Vec1]] 7 ++; CHECK-SPIRV-NEXT: 4 ConvertPtrToU [[TypeInt64]] [[Ptr2:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec2:[0-9]+]] [[Ptr2]] ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v10:[0-9]+]] [[Vec2]] 0 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v11:[0-9]+]] [[Vec2]] 1 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v12:[0-9]+]] [[Vec2]] 2 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v13:[0-9]+]] [[Vec2]] 3 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v14:[0-9]+]] [[Vec2]] 4 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v15:[0-9]+]] [[Vec2]] 5 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v16:[0-9]+]] [[Vec2]] 6 ++; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v17:[0-9]+]] [[Vec2]] 7 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec0:[0-9]+]] [[v00]] [[TypeUndef]] 0 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec1:[0-9]+]] [[v01]] [[NewVec0]] 1 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec2:[0-9]+]] [[v02]] [[NewVec1]] 2 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec3:[0-9]+]] [[v03]] [[NewVec2]] 3 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec4:[0-9]+]] [[v04]] [[NewVec3]] 4 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec5:[0-9]+]] [[v05]] [[NewVec4]] 5 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec6:[0-9]+]] [[v06]] [[NewVec5]] 6 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec7:[0-9]+]] [[v07]] [[NewVec6]] 7 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec8:[0-9]+]] [[v10]] [[NewVec7]] 8 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec9:[0-9]+]] [[v11]] [[NewVec8]] 9 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec10:[0-9]+]] [[v12]] [[NewVec9]] 10 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec11:[0-9]+]] [[v13]] [[NewVec10]] 11 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec12:[0-9]+]] [[v14]] [[NewVec11]] 12 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec13:[0-9]+]] [[v15]] [[NewVec12]] 13 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec14:[0-9]+]] [[v16]] [[NewVec13]] 14 ++; CHECK-SPIRV-NEXT: 6 CompositeInsert [[TypeVec16]] [[NewVec15:[0-9]+]] [[v17]] [[NewVec14]] 15 ++; CHECK-SPIRV-NEXT: 5 Store [[Funcs]] [[NewVec15]] [[TypeInt32]] [[StorePtr]] ++ ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir-unknown-unknown" ++ ++; Function Attrs: noinline norecurse nounwind readnone ++define internal i32 @_Z2f1u2CMvb32_j(i32 %x) { ++entry: ++ ret i32 %x ++} ++; Function Attrs: noinline norecurse nounwind readnone ++define internal i32 @_Z2f2u2CMvb32_j(i32 %x) { ++entry: ++ ret i32 %x ++} ++; Function Attrs: noinline nounwind ++define dllexport void @vadd() { ++entry: ++ %Funcs = alloca <16 x i8>, align 16 ++ store <16 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 7), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 7)>, <16 x i8>* %Funcs, align 16 ++ ret void ++} +-- +2.17.1 + diff --git a/patches/spirv/0009-Support-function-pointers-in-cast-instructions.patch b/patches/spirv/0009-Support-function-pointers-in-cast-instructions.patch new file mode 100644 index 00000000..0f8046d1 --- /dev/null +++ b/patches/spirv/0009-Support-function-pointers-in-cast-instructions.patch @@ -0,0 +1,98 @@ +From 4f791674ee90444075d1b05c1553da23536c902a Mon Sep 17 00:00:00 2001 +From: Aleksandr Bezzubikov +Date: Thu, 30 Apr 2020 22:19:20 +0300 +Subject: [PATCH 09/10] Support function pointers in cast instructions + +--- + lib/SPIRV/SPIRVWriter.cpp | 12 +++++++++++- + test/DebugInfo/DebugValueInvalid.ll | 6 ------ + test/constexpr_vector.ll | 10 +++++++--- + 3 files changed, 18 insertions(+), 10 deletions(-) + +diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp +index df6f070..7fccf34 100644 +--- a/lib/SPIRV/SPIRVWriter.cpp ++++ b/lib/SPIRV/SPIRVWriter.cpp +@@ -684,6 +684,7 @@ SPIRVInstruction *LLVMToSPIRV::transCmpInst(CmpInst *Cmp, SPIRVBasicBlock *BB) { + SPIRV::SPIRVInstruction *LLVMToSPIRV::transUnaryInst(UnaryInstruction *U, + SPIRVBasicBlock *BB) { + Op BOC = OpNop; ++ SPIRVValue *Op = nullptr; + if (auto Cast = dyn_cast(U)) { + if (Cast->getDestTy()->getPointerAddressSpace() == SPIRAS_Generic) { + assert(Cast->getSrcTy()->getPointerAddressSpace() != SPIRAS_Constant && +@@ -698,9 +699,18 @@ SPIRV::SPIRVInstruction *LLVMToSPIRV::transUnaryInst(UnaryInstruction *U, + } else { + auto OpCode = U->getOpcode(); + BOC = OpCodeMap::map(OpCode); ++ ++ if (Function *F = dyn_cast(U->getOperand(0))) { ++ assert(BOC == OpConvertPtrToU && ++ "Illegal unary operation on function pointer"); ++ Op = BM->addFunctionPointerINTELInst( ++ transType(F->getType()), ++ static_cast(transValue(F, BB)), BB); ++ } + } + +- auto Op = transValue(U->getOperand(0), BB); ++ if (!Op) ++ Op = transValue(U->getOperand(0), BB); + return BM->addUnaryInst(transBoolOpCode(Op, BOC), transType(U->getType()), Op, + BB); + } +diff --git a/test/DebugInfo/DebugValueInvalid.ll b/test/DebugInfo/DebugValueInvalid.ll +index d58eece..01dd4d1 100644 +--- a/test/DebugInfo/DebugValueInvalid.ll ++++ b/test/DebugInfo/DebugValueInvalid.ll +@@ -54,12 +54,6 @@ entry: + %call = call spir_func %opencl.queue_t* @_Z17get_default_queuev() #6, !dbg !54 + ; CHECK: InBoundsPtrAccessChain + ; CHECK: Store +-; SPIRVLowerOCLBlocks pass has removed bitcast and addrspacecast, because their operands are function pointers. +-; CHECK-NOT: BitCast +-; CHECK-NOT: PtrCastToGeneric +-; The result of these casts was used by llvm.dbg.value. This intrinsic becomes invalid, +-; so we do not translate it and its debug location +-; CHECK-NOT: DebugValue + ; CHECK: GetDefaultQueue + + %3 = bitcast %struct.ndrange_t* %tmp to i8*, !dbg !55 +diff --git a/test/constexpr_vector.ll b/test/constexpr_vector.ll +index 6df71e2..d441429 100644 +--- a/test/constexpr_vector.ll ++++ b/test/constexpr_vector.ll +@@ -45,7 +45,9 @@ + + ; RUN: llvm-as < %s | llvm-spirv -spirv-text -spirv-mem2reg=false | FileCheck %s --check-prefix=CHECK-SPIRV + +-; CHECK-SPIRV: 4 Name [[Funcs:[0-9]+]] "Funcs" ++; CHECK-SPIRV-DAG: 4 Name [[Funcs:[0-9]+]] "Funcs" ++; CHECK-SPIRV-DAG: 6 Name [[F1:[0-9+]]] "_Z2f1u2CMvb32_j" ++; CHECK-SPIRV-DAG: 6 Name [[F2:[0-9+]]] "_Z2f2u2CMvb32_j" + + ; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt8:[0-9]+]] 8 0 + ; CHECK-SPIRV-DAG: 4 TypeInt [[TypeInt32:[0-9]+]] 32 0 +@@ -55,7 +57,8 @@ + ; CHECK-SPIRV-DAG: 4 TypePointer [[StorePtr:[0-9]+]] 7 [[TypeVec16]] + ; CHECK-SPIRV-DAG: 3 Undef [[TypeVec16]] [[TypeUndef:[0-9]+]] + +-; CHECK-SPIRV: 4 ConvertPtrToU [[TypeInt64]] [[Ptr1:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV: 4 FunctionPointerINTEL [[FuncPtrTy:[0-9]+]] [[F1Ptr:[0-9]+]] [[F1]] ++; CHECK-SPIRV-NEXT: 4 ConvertPtrToU [[TypeInt64]] [[Ptr1:[0-9]+]] [[F1Ptr]] + ; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec1:[0-9]+]] [[Ptr1]] + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v00:[0-9]+]] [[Vec1]] 0 + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v01:[0-9]+]] [[Vec1]] 1 +@@ -65,7 +68,8 @@ + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v05:[0-9]+]] [[Vec1]] 5 + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v06:[0-9]+]] [[Vec1]] 6 + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v07:[0-9]+]] [[Vec1]] 7 +-; CHECK-SPIRV-NEXT: 4 ConvertPtrToU [[TypeInt64]] [[Ptr2:[0-9]+]] {{[0-9]+}} ++; CHECK-SPIRV-NEXT: 4 FunctionPointerINTEL [[FuncPtrTy]] [[F2Ptr:[0-9]+]] [[F2]] ++; CHECK-SPIRV-NEXT: 4 ConvertPtrToU [[TypeInt64]] [[Ptr2:[0-9]+]] [[F2Ptr]] + ; CHECK-SPIRV-NEXT: 4 Bitcast [[TypeVec8]] [[Vec2:[0-9]+]] [[Ptr2]] + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v10:[0-9]+]] [[Vec2]] 0 + ; CHECK-SPIRV-NEXT: 5 CompositeExtract [[TypeInt8]] [[v11:[0-9]+]] [[Vec2]] 1 +-- +2.17.1 + diff --git a/patches/spirv/0010-Improve-constant-expressions-lowering-for-function-p.patch b/patches/spirv/0010-Improve-constant-expressions-lowering-for-function-p.patch new file mode 100644 index 00000000..48c38403 --- /dev/null +++ b/patches/spirv/0010-Improve-constant-expressions-lowering-for-function-p.patch @@ -0,0 +1,192 @@ +From d13e7857c08beee2143f35b4222e8f5a9f78570e Mon Sep 17 00:00:00 2001 +From: Aleksandr Bezzubikov +Date: Wed, 20 May 2020 01:16:22 +0300 +Subject: [PATCH] Improve constant expressions lowering for function pointers. + +Extend constexprs lowering support to lower constant vector of pure function pointers +w/o any transformations inside. +--- + lib/SPIRV/SPIRVLowerConstExpr.cpp | 27 ++++++++----- + lib/SPIRV/SPIRVWriter.cpp | 14 +++++-- + test/constexpr_vector.ll | 13 ++++-- + .../vector_elem.ll | 40 +++++++++++++++++++ + 4 files changed, 76 insertions(+), 18 deletions(-) + create mode 100644 test/transcoding/SPV_INTEL_function_pointers/vector_elem.ll + +diff --git a/lib/SPIRV/SPIRVLowerConstExpr.cpp b/lib/SPIRV/SPIRVLowerConstExpr.cpp +index 0c7d04d..74d73fd 100644 +--- a/lib/SPIRV/SPIRVLowerConstExpr.cpp ++++ b/lib/SPIRV/SPIRVLowerConstExpr.cpp +@@ -115,7 +115,6 @@ bool SPIRVLowerConstExpr::runOnModule(Module &Module) { + + void SPIRVLowerConstExpr::visit(Module *M) { + for (auto &I : M->functions()) { +- std::map CMap; + std::list WorkList; + for (auto &BI : I) { + for (auto &II : BI) { +@@ -126,7 +125,10 @@ void SPIRVLowerConstExpr::visit(Module *M) { + while (!WorkList.empty()) { + auto II = WorkList.front(); + +- auto LowerOp = [&II, &FBegin, &I](ConstantExpr *CE) { ++ auto LowerOp = [&II, &FBegin, &I](Value *V) -> Value * { ++ if (isa(V)) ++ return V; ++ auto *CE = cast(V); + SPIRVDBG(dbgs() << "[lowerConstantExpressions] " << *CE;) + auto ReplInst = CE->getAsInstruction(); + auto InsPoint = II->getParent() == &*FBegin ? II : &FBegin->back(); +@@ -151,25 +153,30 @@ void SPIRVLowerConstExpr::visit(Module *M) { + for (unsigned OI = 0, OE = II->getNumOperands(); OI != OE; ++OI) { + auto Op = II->getOperand(OI); + auto *Vec = dyn_cast(Op); +- if (Vec && std::all_of(Vec->op_begin(), Vec->op_end(), +- [](Value *V) { return isa(V); })) { ++ if (Vec && std::all_of(Vec->op_begin(), Vec->op_end(), [](Value *V) { ++ return isa(V) || isa(V); ++ })) { + // Expand a vector of constexprs and construct it back with series of + // insertelement instructions +- std::list ReplList; +- std::transform( +- Vec->op_begin(), Vec->op_end(), std::back_inserter(ReplList), +- [LowerOp](Value *V) { return LowerOp(cast(V)); }); ++ std::list OpList; ++ std::transform(Vec->op_begin(), Vec->op_end(), ++ std::back_inserter(OpList), ++ [LowerOp](Value *V) { return LowerOp(V); }); + Value *Repl = nullptr; + unsigned Idx = 0; +- for (auto V : ReplList) ++ std::list ReplList; ++ for (auto V : OpList) { ++ if (auto *Inst = dyn_cast(V)) ++ ReplList.push_back(Inst); + Repl = InsertElementInst::Create( + (Repl ? Repl : UndefValue::get(Vec->getType())), V, + ConstantInt::get(Type::getInt32Ty(M->getContext()), Idx++), "", + II); ++ } + II->replaceUsesOfWith(Op, Repl); + WorkList.splice(WorkList.begin(), ReplList); + } else if (auto CE = dyn_cast(Op)) +- WorkList.push_front(LowerOp(CE)); ++ WorkList.push_front(cast(LowerOp(CE))); + } + } + } +diff --git a/lib/SPIRV/SPIRVWriter.cpp b/lib/SPIRV/SPIRVWriter.cpp +index 7fccf34..8a0ca47 100644 +--- a/lib/SPIRV/SPIRVWriter.cpp ++++ b/lib/SPIRV/SPIRVWriter.cpp +@@ -997,13 +997,19 @@ SPIRVValue *LLVMToSPIRV::transValueWithoutDecoration(Value *V, + + if (auto Ins = dyn_cast(V)) { + auto Index = Ins->getOperand(2); +- if (auto Const = dyn_cast(Index)) ++ if (auto Const = dyn_cast(Index)) { ++ SPIRVValue *InsVal = nullptr; ++ if (auto *F = dyn_cast(Ins->getOperand(1))) { ++ InsVal = BM->addFunctionPointerINTELInst( ++ transType(F->getType()), ++ static_cast(transValue(F, BB)), BB); ++ } else ++ InsVal = transValue(Ins->getOperand(1), BB); + return mapValue(V, BM->addCompositeInsertInst( +- transValue(Ins->getOperand(1), BB), +- transValue(Ins->getOperand(0), BB), ++ InsVal, transValue(Ins->getOperand(0), BB), + std::vector(1, Const->getZExtValue()), + BB)); +- else ++ } else + return mapValue( + V, BM->addVectorInsertDynamicInst(transValue(Ins->getOperand(0), BB), + transValue(Ins->getOperand(1), BB), +diff --git a/test/constexpr_vector.ll b/test/constexpr_vector.ll +index d441429..d74b334 100644 +--- a/test/constexpr_vector.ll ++++ b/test/constexpr_vector.ll +@@ -1,8 +1,7 @@ + ; RUN: llvm-as < %s | llvm-spirv -spirv-mem2reg=false -s | llvm-dis | FileCheck %s --check-prefix=CHECK-LLVM + + ; CHECK-LLVM: define dllexport void @vadd() { +-; CHECK-LLVM-NEXT: entry: +-; CHECK-LLVM-NEXT: %Funcs = alloca <16 x i8>, align 16 ++; CHECK-LLVM: %Funcs = alloca <16 x i8>, align 16 + ; CHECK-LLVM-NEXT: %0 = ptrtoint i32 (i32)* @_Z2f1u2CMvb32_j to i64 + ; CHECK-LLVM-NEXT: %1 = bitcast i64 %0 to <8 x i8> + ; CHECK-LLVM-NEXT: %2 = extractelement <8 x i8> %1, i32 0 +@@ -40,8 +39,12 @@ + ; CHECK-LLVM-NEXT: %34 = insertelement <16 x i8> %33, i8 %18, i32 14 + ; CHECK-LLVM-NEXT: %35 = insertelement <16 x i8> %34, i8 %19, i32 15 + ; CHECK-LLVM-NEXT: store <16 x i8> %35, <16 x i8>* %Funcs, align 16 +-; CHECK-LLVM-NEXT: ret void +-; CHECK-LLVM-NEXT: } ++; CHECK-LLVM: %Funcs1 = alloca <2 x i64>, align 16 ++; CHECK-LLVM-NEXT: %36 = ptrtoint i32 (i32)* @_Z2f1u2CMvb32_j to i64 ++; CHECK-LLVM-NEXT: %37 = ptrtoint i32 (i32)* @_Z2f2u2CMvb32_j to i64 ++; CHECK-LLVM-NEXT: %38 = insertelement <2 x i64> undef, i64 %36, i32 0 ++; CHECK-LLVM-NEXT: %39 = insertelement <2 x i64> %38, i64 %37, i32 1 ++; CHECK-LLVM-NEXT: store <2 x i64> %39, <2 x i64>* %Funcs1, align 16 + + ; RUN: llvm-as < %s | llvm-spirv -spirv-text -spirv-mem2reg=false | FileCheck %s --check-prefix=CHECK-SPIRV + +@@ -115,5 +118,7 @@ define dllexport void @vadd() { + entry: + %Funcs = alloca <16 x i8>, align 16 + store <16 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f1u2CMvb32_j to i64) to <8 x i8>), i32 7), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 0), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 1), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 2), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 3), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 4), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 5), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 6), i8 extractelement (<8 x i8> bitcast (i64 ptrtoint (i32 (i32)* @_Z2f2u2CMvb32_j to i64) to <8 x i8>), i32 7)>, <16 x i8>* %Funcs, align 16 ++ %Funcs1 = alloca <2 x i64>, align 16 ++ store <2 x i64> , <2 x i64>* %Funcs1, align 16 + ret void + } +diff --git a/test/transcoding/SPV_INTEL_function_pointers/vector_elem.ll b/test/transcoding/SPV_INTEL_function_pointers/vector_elem.ll +new file mode 100644 +index 0000000..85fa224 +--- /dev/null ++++ b/test/transcoding/SPV_INTEL_function_pointers/vector_elem.ll +@@ -0,0 +1,40 @@ ++; RUN: llvm-as < %s | llvm-spirv -spirv-text | FileCheck %s --check-prefix=CHECK-SPIRV ++ ++; CHECK-SPIRV-DAG: 6 Name [[F1:[0-9+]]] "_Z2f1u2CMvb32_j" ++; CHECK-SPIRV-DAG: 6 Name [[F2:[0-9+]]] "_Z2f2u2CMvb32_j" ++ ++; CHECK-SPIRV: 4 TypeInt [[TypeInt32:[0-9]+]] 32 0 ++; CHECK-SPIRV: 4 TypeFunction [[TypeFunc:[0-9]+]] [[TypeInt32]] [[TypeInt32]] ++; CHECK-SPIRV: 4 TypePointer [[TypePtr:[0-9]+]] {{[0-9]+}} [[TypeFunc]] ++; CHECK-SPIRV: 4 TypeVector [[TypeVec:[0-9]+]] [[TypePtr]] [[TypeInt32]] ++; CHECK-SPIRV: 3 Undef [[TypeVec]] [[TypeUndef:[0-9]+]] ++ ++; CHECK-SPIRV: 4 FunctionPointerINTEL [[TypePtr]] [[F1Ptr:[0-9]+]] [[F1]] ++; CHECK-SPIRV: 6 CompositeInsert [[TypeVec]] [[NewVec0:[0-9]+]] [[F1Ptr]] [[TypeUndef]] 0 ++; CHECK-SPIRV: 4 FunctionPointerINTEL [[TypePtr]] [[F2Ptr:[0-9]+]] [[F2]] ++; CHECK-SPIRV: 6 CompositeInsert [[TypeVec]] [[NewVec1:[0-9]+]] [[F2Ptr]] [[NewVec0]] 1 ++ ++ ++target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" ++target triple = "spir-unknown-unknown" ++ ++; Function Attrs: noinline norecurse nounwind readnone ++define internal i32 @_Z2f1u2CMvb32_j(i32 %x) { ++entry: ++ ret i32 %x ++} ++; Function Attrs: noinline norecurse nounwind readnone ++define internal i32 @_Z2f2u2CMvb32_j(i32 %x) { ++entry: ++ ret i32 %x ++} ++ ++; Function Attrs: noinline nounwind ++define dllexport void @vadd() { ++entry: ++ %Funcs = alloca <2 x i32 (i32)*>, align 16 ++ %0 = insertelement <2 x i32 (i32)*> undef, i32 (i32)* @_Z2f1u2CMvb32_j, i32 0 ++ %1 = insertelement <2 x i32 (i32)*> %0, i32 (i32)* @_Z2f2u2CMvb32_j, i32 1 ++ store <2 x i32 (i32)*> %1, <2 x i32 (i32)*>* %Funcs, align 16 ++ ret void ++} +-- +2.17.1 +