From 03bfdf961722b1d46586ac5eabcd402009f5383f Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Wed, 16 Oct 2024 11:08:55 -0700 Subject: [PATCH 1/5] [core] Fix various bugs when processing the host-side functions and the (#2268) * Fix various bugs when processing the host-side functions and the native calling conventions. As support for more types is added, it is required that the host side code have the same calling conventions as the native clang++ compiler. Otherwise, programs will simply fail to execute correctly. This is part 1. The launch kernel execution path is broken, and those bugs will be addressed in the next PR. Signed-off-by: Eric Schweitz * Workaround -Werror Signed-off-by: Eric Schweitz * Fix CI Werror issue. Signed-off-by: Eric Schweitz * The requires line didn't work as expected. Script the restriction as a workaround. Signed-off-by: Eric Schweitz * The assets build doesn't respect the "uname -m" check for some unknown reason, so just elide the test completely. Signed-off-by: Eric Schweitz * Review comment. Signed-off-by: Eric Schweitz --------- Signed-off-by: Eric Schweitz --- docker/build/assets.Dockerfile | 1 + include/cudaq/Optimizer/Builder/Factory.h | 7 +- lib/Optimizer/Builder/Factory.cpp | 176 ++++++--- .../Transforms/GenKernelExecution.cpp | 330 ++++++----------- lib/Optimizer/Transforms/LiftArrayAlloc.cpp | 6 +- targettests/execution/auto_kernel-cpp17.cpp | 2 +- targettests/execution/auto_kernel.cpp | 2 +- test/AST-Quake/calling_convention.cpp | 335 ++++++++++++++++++ test/AST-Quake/vector_int-1.cpp | 25 +- test/Quake-QIR/return_values.qke | 188 +++------- test/Quake/return_vector.qke | 30 +- 11 files changed, 661 insertions(+), 441 deletions(-) create mode 100644 test/AST-Quake/calling_convention.cpp diff --git a/docker/build/assets.Dockerfile b/docker/build/assets.Dockerfile index f7d34e9268..73e20627ad 100644 --- a/docker/build/assets.Dockerfile +++ b/docker/build/assets.Dockerfile @@ -286,6 +286,7 @@ RUN cd /cuda-quantum && source scripts/configure_build.sh && \ # The tests is marked correctly as requiring nvcc, but since nvcc # is available during the build we need to filter it manually. filtered=" --filter-out MixedLanguage/cuda-1"; \ + filtered+="|AST-Quake/calling_convention"; \ fi && \ "$LLVM_INSTALL_PREFIX/bin/llvm-lit" -v build/test \ --param nvqpp_site_config=build/test/lit.site.cfg.py ${filtered} && \ diff --git a/include/cudaq/Optimizer/Builder/Factory.h b/include/cudaq/Optimizer/Builder/Factory.h index 868cf4c861..24e933117a 100644 --- a/include/cudaq/Optimizer/Builder/Factory.h +++ b/include/cudaq/Optimizer/Builder/Factory.h @@ -236,6 +236,10 @@ createMonotonicLoop(mlir::OpBuilder &builder, mlir::Location loc, bool hasHiddenSRet(mlir::FunctionType funcTy); +/// Check a function to see if argument 0 has the `sret` attribute. Typically, +/// one may find this on a host-side entry point function. +bool hasSRet(mlir::func::FuncOp funcOp); + /// Convert the function type \p funcTy to a signature compatible with the code /// on the host side. This will add hidden arguments, such as the `this` /// pointer, convert some results to `sret` pointers, etc. @@ -251,7 +255,8 @@ bool isX86_64(mlir::ModuleOp); bool isAArch64(mlir::ModuleOp); /// A small structure may be passed as two arguments on the host side. (e.g., on -/// the X86-64 ABI.) If \p ty is not a `struct`, this returns `false`. +/// the X86-64 ABI.) If \p ty is not a `struct`, this returns `false`. Note +/// also, some small structs may be packed into a single register. bool structUsesTwoArguments(mlir::Type ty); std::optional getIntIfConstant(mlir::Value value); diff --git a/lib/Optimizer/Builder/Factory.cpp b/lib/Optimizer/Builder/Factory.cpp index 1c14ec349a..73b66cdac3 100644 --- a/lib/Optimizer/Builder/Factory.cpp +++ b/lib/Optimizer/Builder/Factory.cpp @@ -18,6 +18,9 @@ using namespace mlir; namespace cudaq::opt { +// The common small struct limit for architectures cudaq is supporting. +static constexpr unsigned CommonSmallStructSize = 128; + bool factory::isX86_64(ModuleOp module) { std::string triple; if (auto ta = module->getAttr(targetTripleAttrName)) @@ -302,33 +305,6 @@ cc::LoopOp factory::createMonotonicLoop( return loop; } -// FIXME: some ABIs may return a small struct in registers rather than via an -// sret pointer. -// -// On x86_64, -// pair of: argument return value packed from msb to lsb -// i32 : i64 i64 (second, first) -// i64 : i64, i64 { i64, i64 } -// f32 : <2 x float> <2 x float> -// f64 : double, double { double, double } -// -// On aarch64, -// pair of: argument return value packed from msb to lsb -// i32 : i64 i64 (second, first) -// i64 : [2 x i64] [2 x i64] -// f32 : [2 x float] { float, float } -// f64 : [2 x double] { double, double } -bool factory::hasHiddenSRet(FunctionType funcTy) { - // If a function has more than 1 result, the results are promoted to a - // structured return argument. Otherwise, if there is 1 result and it is an - // aggregate type, then it is promoted to a structured return argument. - auto numResults = funcTy.getNumResults(); - return numResults > 1 || - (numResults == 1 && funcTy.getResult(0) - .isa()); -} - cc::StructType factory::stlStringType(MLIRContext *ctx) { auto i8Ty = IntegerType::get(ctx, 8); auto ptrI8Ty = cc::PointerType::get(i8Ty); @@ -361,8 +337,8 @@ Type factory::getSRetElementType(FunctionType funcTy) { auto *ctx = funcTy.getContext(); if (funcTy.getNumResults() > 1) return cc::StructType::get(ctx, funcTy.getResults()); - if (isa(funcTy.getResult(0))) - return getDynamicBufferType(ctx); + if (auto spanTy = dyn_cast(funcTy.getResult(0))) + return stlVectorType(spanTy.getElementType()); return funcTy.getResult(0); } @@ -403,33 +379,49 @@ static Type convertToHostSideType(Type ty) { // function tries to simulate GCC argument passing conventions. classify() also // has a number of FIXME comments, where it diverges from the referenced ABI. // Empirical evidence show that on x86_64, integers and floats are packed in -// integers of size 32 or 64 together, unless the float member fits by itself. +// integers of size 8, 16, 24, 32 or 64 together, unless the float member fits +// by itself. static bool shouldExpand(SmallVectorImpl &packedTys, cc::StructType structTy) { if (structTy.isEmpty()) return false; auto *ctx = structTy.getContext(); unsigned bits = 0; + auto scaleBits = [&](unsigned size) { + if (size < 32) + size = (size + 7) & ~7u; + if (size > 32 && size <= 64) + size = 64; + return size; + }; // First split the members into a "lo" set and a "hi" set. SmallVector set1; SmallVector set2; for (auto ty : structTy.getMembers()) { if (auto intTy = dyn_cast(ty)) { - bits += intTy.getWidth(); - if (bits <= 64) + auto addBits = scaleBits(intTy.getWidth()); + if (bits + addBits <= 64) { + bits += addBits; set1.push_back(ty); - else + } else { + bits = std::max(bits, 64u) + addBits; set2.push_back(ty); + } } else if (auto fltTy = dyn_cast(ty)) { - bits += fltTy.getWidth(); - if (bits <= 64) + auto addBits = fltTy.getWidth(); + if (bits + addBits <= 64) { + bits += addBits; set1.push_back(ty); - else + } else { + bits = std::max(bits, 64u) + addBits; set2.push_back(ty); + } } else { return false; } + if (bits > CommonSmallStructSize) + return false; } // Process the sets. If the set has anything integral, use integer. If the set @@ -441,12 +433,23 @@ static bool shouldExpand(SmallVectorImpl &packedTys, return true; return false; }; + auto intSetSize = [&](auto theSet) { + unsigned size = 0; + for (auto ty : theSet) + size += scaleBits(ty.getIntOrFloatBitWidth()); + return size; + }; auto processMembers = [&](auto theSet, unsigned packIdx) { if (useInt(theSet)) { - packedTys[packIdx] = IntegerType::get(ctx, bits > 32 ? 64 : 32); + auto size = intSetSize(theSet); + if (size <= 32) + packedTys[packIdx] = IntegerType::get(ctx, size); + else + packedTys[packIdx] = IntegerType::get(ctx, 64); } else if (theSet.size() == 1) { packedTys[packIdx] = theSet[0]; } else { + assert(theSet[0] == FloatType::getF32(ctx) && "must be float"); packedTys[packIdx] = VectorType::get(ArrayRef{2}, theSet[0]); } @@ -454,15 +457,59 @@ static bool shouldExpand(SmallVectorImpl &packedTys, assert(!set1.empty() && "struct must have members"); packedTys.resize(set2.empty() ? 1 : 2); processMembers(set1, 0); - if (!set2.empty()) - processMembers(set2, 1); + if (set2.empty()) + return false; + processMembers(set2, 1); return true; } +bool factory::hasSRet(func::FuncOp funcOp) { + if (funcOp.getNumArguments() > 0) + if (auto dict = funcOp.getArgAttrDict(0)) + return dict.contains(LLVM::LLVMDialect::getStructRetAttrName()); + return false; +} + +// On x86_64, +// pair of: argument return value packed from msb to lsb +// i32 : i64 i64 (second, first) +// i64 : i64, i64 { i64, i64 } +// f32 : <2 x float> <2 x float> +// f64 : double, double { double, double } +// ptr : ptr, ptr { ptr, ptr } +// +// On aarch64, +// pair of: argument return value packed from msb to lsb +// i32 : i64 i64 (second, first) +// i64 : [2 x i64] [2 x i64] +// f32 : [2 x float] { float, float } +// f64 : [2 x double] { double, double } +// ptr : [2 x i64] [2 x i64] +bool factory::hasHiddenSRet(FunctionType funcTy) { + // If a function has more than 1 result, the results are promoted to a + // structured return argument. Otherwise, if there is 1 result and it is an + // aggregate type, then it is promoted to a structured return argument. + auto numResults = funcTy.getNumResults(); + if (numResults == 0) + return false; + if (numResults > 1) + return true; + auto resTy = funcTy.getResult(0); + if (resTy.isa()) + return true; + if (auto strTy = dyn_cast(resTy)) { + SmallVector packedTys; + bool inRegisters = shouldExpand(packedTys, strTy) || !packedTys.empty(); + return !inRegisters; + } + return false; +} + bool factory::structUsesTwoArguments(mlir::Type ty) { // Unchecked! This is only valid if target is X86-64. auto structTy = dyn_cast(ty); - if (!structTy || structTy.getBitSize() == 0 || structTy.getBitSize() > 128) + if (!structTy || structTy.getBitSize() == 0 || + structTy.getBitSize() > CommonSmallStructSize) return false; SmallVector unused; return shouldExpand(unused, structTy); @@ -486,14 +533,32 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, auto *ctx = funcTy.getContext(); SmallVector inputTys; bool hasSRet = false; - if (factory::hasHiddenSRet(funcTy)) { - // When the kernel is returning a std::vector result, the result is - // returned via a sret argument in the first position. When this argument - // is added, the this pointer becomes the second argument. Both are opaque - // pointers at this point. - auto eleTy = convertToHostSideType(getSRetElementType(funcTy)); - inputTys.push_back(cc::PointerType::get(eleTy)); - hasSRet = true; + Type resultTy; + if (funcTy.getNumResults() == 1) + if (auto strTy = dyn_cast(funcTy.getResult(0))) + if (strTy.getBitSize() != 0 && + strTy.getBitSize() <= CommonSmallStructSize) { + SmallVector packedTys; + if (shouldExpand(packedTys, strTy) || !packedTys.empty()) { + if (packedTys.size() == 1) + resultTy = packedTys[0]; + else + resultTy = cc::StructType::get(ctx, packedTys); + } + } + if (!resultTy && funcTy.getNumResults()) { + if (factory::hasHiddenSRet(funcTy)) { + // When the kernel is returning a std::vector result, the result is + // returned via a sret argument in the first position. When this argument + // is added, the this pointer becomes the second argument. Both are opaque + // pointers at this point. + auto eleTy = convertToHostSideType(getSRetElementType(funcTy)); + inputTys.push_back(cc::PointerType::get(eleTy)); + hasSRet = true; + } else { + assert(funcTy.getNumResults() == 1); + resultTy = funcTy.getResult(0); + } } // If this kernel is a plain old function or a static member function, we // don't want to add a hidden `this` argument. @@ -509,20 +574,25 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, // On x86_64 and aarch64, a struct that is smaller than 128 bits may be // passed in registers as separate arguments. See classifyArgumentType() // in CodeGen/TargetInfo.cpp. - if (strTy.getBitSize() != 0 && strTy.getBitSize() <= 128) { + if (strTy.getBitSize() != 0 && + strTy.getBitSize() <= CommonSmallStructSize) { if (isX86_64(module)) { SmallVector packedTys; if (shouldExpand(packedTys, strTy)) { for (auto ty : packedTys) inputTys.push_back(ty); continue; + } else if (!packedTys.empty()) { + for (auto ty : packedTys) + inputTys.push_back(ty); + continue; } } else { assert(isAArch64(module) && "aarch64 expected"); if (onlyArithmeticMembers(strTy)) { // Empirical evidence shows that on aarch64, arguments are packed - // into a single i64 or a [2 x i64] typed value based on the size of - // the struct. This is regardless of whether the value(s) are + // into a single i64 or a [2 x i64] typed value based on the size + // of the struct. This is regardless of whether the value(s) are // floating-point or not. if (strTy.getBitSize() > 64) inputTys.push_back(cc::ArrayType::get(ctx, i64Ty, 2)); @@ -542,8 +612,8 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, // and it hasn't been converted to a hidden sret argument. if (funcTy.getNumResults() == 0 || hasSRet) return FunctionType::get(ctx, inputTys, {}); - assert(funcTy.getNumResults() == 1); - return FunctionType::get(ctx, inputTys, funcTy.getResults()); + assert(funcTy.getNumResults() == 1 && resultTy); + return FunctionType::get(ctx, inputTys, resultTy); } bool factory::isStdVecArg(Type type) { diff --git a/lib/Optimizer/Transforms/GenKernelExecution.cpp b/lib/Optimizer/Transforms/GenKernelExecution.cpp index 7d693921f1..a4667ce7b5 100644 --- a/lib/Optimizer/Transforms/GenKernelExecution.cpp +++ b/lib/Optimizer/Transforms/GenKernelExecution.cpp @@ -251,7 +251,7 @@ class GenerateKernelExecution builder, loc, cudaq::cc::PointerType::get(i8Ty), fromBuff); builder.create( loc, std::nullopt, cudaq::llvmMemCopyIntrinsic, - SmallVector{outputBuffer, vecFromBuff, bytes, notVolatile}); + ValueRange{outputBuffer, vecFromBuff, bytes, notVolatile}); auto i8ArrTy = cudaq::cc::PointerType::get(cudaq::cc::ArrayType::get(i8Ty)); auto buf1 = cudaq::opt::factory::createCast(builder, loc, i8ArrTy, outputBuffer); @@ -538,80 +538,6 @@ class GenerateKernelExecution return argsCreatorFunc; } - /// If the kernel has an sret argument, then we rewrite the kernel's signature - /// on the target. Note that this requires that the target has the ability to - /// pass stack pointers as function arguments. These stack pointers will - /// obviously only necessarily be valid to the target executing the kernel. - void updateQPUKernelAsSRet(OpBuilder &builder, func::FuncOp funcOp, - FunctionType newFuncTy) { - auto funcTy = funcOp.getFunctionType(); - // We add exactly 1 sret argument regardless of how many fields are folded - // into it. - assert(newFuncTy.getNumInputs() == funcTy.getNumInputs() + 1 && - "sret should be a single argument"); - auto *ctx = funcOp.getContext(); - auto eleTy = cudaq::opt::factory::getSRetElementType(funcTy); - NamedAttrList attrs; - attrs.set(LLVM::LLVMDialect::getStructRetAttrName(), TypeAttr::get(eleTy)); - funcOp.insertArgument(0, newFuncTy.getInput(0), attrs.getDictionary(ctx), - funcOp.getLoc()); - auto elePtrTy = cudaq::cc::PointerType::get(eleTy); - OpBuilder::InsertionGuard guard(builder); - SmallVector returnsToErase; - // Update all func.return to store values to the sret block. - funcOp->walk([&](func::ReturnOp retOp) { - auto loc = retOp.getLoc(); - builder.setInsertionPoint(retOp); - auto cast = builder.create(loc, elePtrTy, - funcOp.getArgument(0)); - if (funcOp.getNumResults() > 1) { - for (int i = 0, end = funcOp.getNumResults(); i != end; ++i) { - auto mem = builder.create( - loc, cudaq::cc::PointerType::get(funcTy.getResult(i)), cast, - SmallVector{i}); - builder.create(loc, retOp.getOperands()[i], mem); - } - } else if (auto stdvecTy = - dyn_cast(funcTy.getResult(0))) { - auto stdvec = retOp.getOperands()[0]; - auto eleTy = [&]() -> Type { - // TODO: Fold this conversion into the StdvecDataOp builder. We will - // never get a data buffer which is not byte addressable and where - // the width is less than 8. - if (auto intTy = dyn_cast(stdvecTy.getElementType())) - if (intTy.getWidth() < 8) - return builder.getI8Type(); - return stdvecTy.getElementType(); - }(); - auto i8Ty = cudaq::cc::PointerType::get(builder.getI8Type()); - auto ptrTy = cudaq::cc::PointerType::get(eleTy); - auto data = builder.create(loc, ptrTy, stdvec); - auto mem0 = builder.create( - loc, cudaq::cc::PointerType::get(i8Ty), cast, - SmallVector{0}); - auto mem1 = builder.create( - loc, cudaq::cc::PointerType::get(ptrTy), mem0); - builder.create(loc, data, mem1); - auto i64Ty = builder.getI64Type(); - auto size = builder.create(loc, i64Ty, stdvec); - auto mem2 = builder.create( - loc, cudaq::cc::PointerType::get(i64Ty), cast, - SmallVector{1}); - builder.create(loc, size, mem2); - } else { - builder.create(loc, retOp.getOperands()[0], cast); - } - builder.create(loc); - returnsToErase.push_back(retOp); - }); - for (auto *op : returnsToErase) - op->erase(); - for (std::size_t i = 0, end = funcOp.getNumResults(); i != end; ++i) - funcOp.eraseResult(0); - modifiedDevKernels.insert( - std::pair{funcOp.getName(), newFuncTy.getInput(0)}); - } - /// In the thunk, we need to unpack any `std::vector` objects encoded in the /// packet. Since these have dynamic size, they are encoded as trailing bytes /// by offset and size. The offset is implicit from the values of the @@ -821,58 +747,23 @@ class GenerateKernelExecution // Unpack the arguments in the struct and build the argument list for // the call to the kernel code. SmallVector args; - const bool hiddenSRet = cudaq::opt::factory::hasHiddenSRet(funcTy); - FunctionType newFuncTy = [&]() { - if (hiddenSRet) { - auto sretPtrTy = cudaq::cc::PointerType::get( - cudaq::opt::factory::getSRetElementType(funcTy)); - SmallVector inputTys = {sretPtrTy}; - inputTys.append(funcTy.getInputs().begin(), funcTy.getInputs().end()); - return FunctionType::get(ctx, inputTys, {}); - } - return funcTy; - }(); - int offset = funcTy.getNumInputs(); - if (hiddenSRet) { - // Use the end of the argument block for the return values. - auto eleTy = structTy.getMember(offset); - auto mem = builder.create( - loc, cudaq::cc::PointerType::get(eleTy), castOp, - SmallVector{offset}); - auto sretPtrTy = cudaq::cc::PointerType::get( - cudaq::opt::factory::getSRetElementType(funcTy)); - auto sretMem = builder.create(loc, sretPtrTy, mem); - args.push_back(sretMem); - - // Rewrite the original kernel's signature and return op(s). - updateQPUKernelAsSRet(builder, funcOp, newFuncTy); - } + const std::int32_t offset = funcTy.getNumInputs(); for (auto inp : llvm::enumerate(funcTy.getInputs())) { auto [a, t] = processInputValue(loc, builder, trailingData, val, inp.value(), inp.index(), structTy); trailingData = t; args.push_back(a); } - auto call = builder.create(loc, newFuncTy.getResults(), + auto call = builder.create(loc, funcTy.getResults(), funcOp.getName(), args); - // If and only if the kernel returns non-sret results, then take those - // values and store them in the results section of the struct. They will - // eventually be returned to the original caller. - if (!hiddenSRet && funcTy.getNumResults() == 1) { - auto eleTy = structTy.getMember(offset); - auto mem = builder.create( - loc, cudaq::cc::PointerType::get(eleTy), castOp, - SmallVector{offset}); - builder.create(loc, call.getResult(0), mem); - } - - // If the original result was a std::vector, then depending on whether - // this is client-server or not, the thunk function packs the dynamic return - // data into a message buffer or just returns a pointer to the shared heap - // allocation, resp. - bool hasVectorResult = funcTy.getNumResults() == 1 && - isa(funcTy.getResult(0)); + const bool hasVectorResult = + funcTy.getNumResults() == 1 && + isa(funcTy.getResult(0)); if (hasVectorResult) { + // If the original result was a std::vector, then depending on whether + // this is client-server or not, the thunk function packs the dynamic + // return data into a message buffer or just returns a pointer to the + // shared heap allocation, resp. auto *currentBlock = builder.getBlock(); auto *reg = currentBlock->getParent(); auto *thenBlock = builder.createBlock(reg); @@ -881,23 +772,53 @@ class GenerateKernelExecution builder.create(loc, isClientServer, thenBlock, elseBlock); builder.setInsertionPointToEnd(thenBlock); - int offset = funcTy.getNumInputs(); auto gepRes = builder.create( loc, cudaq::cc::PointerType::get(structTy.getMember(offset)), castOp, - SmallVector{offset}); - auto gepRes2 = builder.create( + ArrayRef{offset}); + auto resAsVec = builder.create( + loc, cudaq::cc::PointerType::get(funcTy.getResult(0)), gepRes); + builder.create(loc, call.getResult(0), resAsVec); + auto resAsArg = builder.create( loc, cudaq::cc::PointerType::get(thunkTy.getResults()[0]), gepRes); // createDynamicResult packs the input values and the dynamic results // into a single buffer to pass back as a message. auto res = builder.create( loc, thunkTy.getResults()[0], "__nvqpp_createDynamicResult", - ValueRange{thunkEntry->getArgument(0), structSize, gepRes2}); + ValueRange{thunkEntry->getArgument(0), structSize, resAsArg}); builder.create(loc, res.getResult(0)); builder.setInsertionPointToEnd(elseBlock); + auto eleTy = structTy.getMember(offset); + auto memTy = cudaq::cc::PointerType::get(eleTy); + auto mem = builder.create( + loc, memTy, castOp, SmallVector{offset}); + auto resPtrTy = cudaq::cc::PointerType::get(call.getResult(0).getType()); + auto castMem = builder.create(loc, resPtrTy, mem); + builder.create(loc, call.getResult(0), castMem); + } else { + // FIXME: Should check for recursive vector case. + // If the kernel returns non-dynamic results (no spans), then take those + // values and store them in the results section of the struct. They will + // eventually be returned to the original caller. + if (funcTy.getNumResults()) { + for (std::int32_t o = 0; + o < static_cast(funcTy.getNumResults()); ++o) { + auto eleTy = structTy.getMember(offset + o); + auto memTy = cudaq::cc::PointerType::get(eleTy); + auto mem = builder.create( + loc, memTy, castOp, + SmallVector{offset + o}); + auto resTy = call.getResult(o).getType(); + auto resPtrTy = cudaq::cc::PointerType::get(resTy); + Value castMem = mem; + if (resPtrTy != mem.getType()) + castMem = builder.create(loc, resPtrTy, mem); + builder.create(loc, call.getResult(o), castMem); + } + } } // zeroDynamicResult is used by models other than client-server. It assumes - // that no messages need to be sent, the CPU and QPU code share a memory - // space, and therefore skips making any copies. + // that no messages need to be sent and that the CPU and QPU code share a + // memory space. Therefore, making any copies can be skipped. auto zeroRes = builder.create(loc, thunkTy.getResults()[0], "__nvqpp_zeroDynamicResult", ValueRange{}); @@ -1125,11 +1046,10 @@ class GenerateKernelExecution func::FuncOp thunkFunc) { auto *ctx = builder.getContext(); auto i64Ty = builder.getI64Type(); - auto offset = devFuncTy.getNumInputs(); + std::int32_t offset = devFuncTy.getNumInputs(); auto thunkTy = getThunkType(ctx); auto structPtrTy = cudaq::cc::PointerType::get(structTy); Block *hostFuncEntryBlock = hostFunc.addEntryBlock(); - const bool hiddenSRet = cudaq::opt::factory::hasHiddenSRet(devFuncTy); OpBuilder::InsertionGuard guard(builder); builder.setInsertionPointToStart(hostFuncEntryBlock); @@ -1170,7 +1090,7 @@ class GenerateKernelExecution // launch kernel. if (isa(quakeTy)) { auto kernKey = builder.create( - loc, builder.getI64Type(), cudaq::runtime::getLinkableKernelKey, + loc, i64Ty, cudaq::runtime::getLinkableKernelKey, ValueRange{arg}); stVal = builder.create( loc, stVal.getType(), stVal, kernKey.getResult(0), idx); @@ -1308,8 +1228,8 @@ class GenerateKernelExecution std::int32_t idx = inp.index(); Type quakeTy = devFuncTy.getInput(idx); if (auto stdvecTy = dyn_cast(quakeTy)) { - auto bytes = builder.create( - loc, builder.getI64Type(), stVal, idx); + auto bytes = builder.create(loc, i64Ty, + stVal, idx); assert(stdvecTy == devFuncTy.getInput(idx)); auto ptrInTy = cast(inTy); vecToBuffer = encodeVectorData(loc, builder, bytes, stdvecTy, arg, @@ -1351,7 +1271,6 @@ class GenerateKernelExecution loc, cudaq::opt::factory::stlVectorType(ptrI8Ty)); auto arrPtrTy = cudaq::cc::ArrayType::get(ctx, ptrI8Ty, count); Value buffer = builder.create(loc, arrPtrTy); - auto i64Ty = builder.getI64Type(); auto buffSize = builder.create(loc, i64Ty, arrPtrTy); auto ptrPtrTy = cudaq::cc::PointerType::get(ptrI8Ty); auto cast1 = builder.create(loc, ptrPtrTy, buffer); @@ -1458,61 +1377,76 @@ class GenerateKernelExecution // result value(s) from the struct returned by `launchKernel` and return // them to our caller. SmallVector results; - const bool multiResult = devFuncTy.getResults().size() > 1; - for (auto res : llvm::enumerate(devFuncTy.getResults())) { - int off = res.index() + offset; - if (auto vecTy = dyn_cast(res.value())) { - auto eleTy = vecTy.getElementType(); - auto ptrTy = cudaq::cc::PointerType::get(eleTy); - auto gep0 = builder.create( - loc, cudaq::cc::PointerType::get(ptrTy), temp, - SmallVector{0, off, 0}); - auto dataPtr = builder.create(loc, gep0); - auto lenPtrTy = cudaq::cc::PointerType::get(builder.getI64Type()); - auto gep1 = builder.create( - loc, lenPtrTy, temp, - SmallVector{0, off, 1}); - auto vecLen = builder.create(loc, gep1); - if (vecTy.getElementType() == builder.getI1Type()) { - genStdvecBoolFromInitList(loc, builder, - hostFuncEntryBlock->getArguments().front(), - dataPtr, vecLen); - } else { - cudaq::IRBuilder irBuilder(builder); - Value tSize = irBuilder.getByteSizeOfType(loc, eleTy); - if (!tSize) { - TODO_loc(loc, "unhandled vector element type"); - return; - } - genStdvecTFromInitList(loc, builder, - hostFuncEntryBlock->getArguments().front(), - dataPtr, tSize, vecLen); - } - offset++; + auto hostFuncTy = hostFunc.getFunctionType(); + assert((hostFuncTy.getResults().empty() || + (hostFuncTy.getNumResults() == 1)) && + "C++ function expected to have 0 or 1 return value"); + const bool resultVal = !hostFuncTy.getResults().empty(); + if (resultVal || cudaq::opt::factory::hasSRet(hostFunc)) { + // Host function returns a value. Either returning by value or via an sret + // reference. + if (resultVal) { + Type res0Ty = structTy.getMember(offset); + auto ptrResTy = cudaq::cc::PointerType::get(res0Ty); + auto resPtr = builder.create( + loc, ptrResTy, temp, ArrayRef{0, offset}); + Type castToTy = cudaq::cc::PointerType::get(hostFuncTy.getResult(0)); + auto castResPtr = [&]() -> Value { + if (castToTy == ptrResTy) + return resPtr; + return builder.create(loc, castToTy, resPtr); + }(); + results.push_back(builder.create(loc, castResPtr)); } else { - auto gep0 = builder.create( - loc, cudaq::cc::PointerType::get(structTy.getMember(off)), temp, - SmallVector{0, off}); - auto gep = cudaq::opt::factory::createCast( - builder, loc, cudaq::cc::PointerType::get(res.value()), gep0); - Value loadVal = builder.create(loc, gep); - if (hiddenSRet) { - auto sretPtr = [&]() -> Value { - if (multiResult) - return builder.create( - loc, cudaq::cc::PointerType::get(res.value()), - hostFuncEntryBlock->getArguments().front(), - SmallVector{off}); - return builder.create( - loc, cudaq::cc::PointerType::get(res.value()), - hostFuncEntryBlock->getArguments().front()); - }(); - builder.create(loc, loadVal, sretPtr); + // Check if device is returning a span. If it is, then we will need to + // convert it to a std::vector here. The vector is constructed in-place + // on the sret memory block. + Value arg0 = hostFuncEntryBlock->getArguments().front(); + if (auto spanTy = + dyn_cast(devFuncTy.getResult(0))) { + auto eleTy = spanTy.getElementType(); + auto ptrTy = cudaq::cc::PointerType::get(eleTy); + auto gep0 = builder.create( + loc, cudaq::cc::PointerType::get(ptrTy), temp, + SmallVector{0, offset, 0}); + auto dataPtr = builder.create(loc, gep0); + auto lenPtrTy = cudaq::cc::PointerType::get(i64Ty); + auto gep1 = builder.create( + loc, lenPtrTy, temp, + SmallVector{0, offset, 1}); + auto vecLen = builder.create(loc, gep1); + if (spanTy.getElementType() == builder.getI1Type()) { + genStdvecBoolFromInitList(loc, builder, arg0, dataPtr, vecLen); + } else { + Value tSize = + builder.create(loc, i64Ty, eleTy); + genStdvecTFromInitList(loc, builder, arg0, dataPtr, tSize, vecLen); + } } else { - results.push_back(loadVal); + // Otherwise, we can just copy the aggregate into the sret memory + // block. Uses the size of the host function's sret pointer element + // type for the memcpy, so the device should return an (aggregate) + // value of suitable size. + Type res0Ty = structTy.getMember(offset); + auto ptrResTy = cudaq::cc::PointerType::get(res0Ty); + auto resPtr = builder.create( + loc, ptrResTy, temp, + ArrayRef{0, offset}); + auto castMsgBuff = + builder.create(loc, ptrI8Ty, resPtr); + Type eleTy = + cast(arg0.getType()).getElementType(); + Value bytes = builder.create(loc, i64Ty, eleTy); + auto notVolatile = builder.create(loc, 0, 1); + auto castArg0 = builder.create(loc, ptrI8Ty, arg0); + builder.create( + loc, std::nullopt, cudaq::llvmMemCopyIntrinsic, + ValueRange{castArg0, castMsgBuff, bytes, notVolatile}); } } } + + // Return the result (if any). builder.create(loc, results); } @@ -1779,45 +1713,11 @@ class GenerateKernelExecution cudaq::opt::factory::createGlobalCtorCall( module, FlatSymbolRefAttr::get(ctx, initFun.getName())); - SmallVector deadCalls; - module.walk([&](func::CallOp call) { - if (!call.getResults().empty()) { - auto callee = call.getCallee(); - auto iter = modifiedDevKernels.find(callee); - if (iter != modifiedDevKernels.end()) { - OpBuilder builder(call); - Type ty = call.getResult(0).getType(); - auto loc = call.getLoc(); - auto strTy = cast( - cast(iter->second).getElementType()); - auto buff = builder.create(loc, strTy); - SmallVector args = {buff}; - args.append(call.getOperands().begin(), call.getOperands().end()); - builder.create(loc, TypeRange{}, callee, args); - auto buffPtrPtr = builder.create( - loc, cudaq::cc::PointerType::get(strTy.getMember(0)), buff, - ArrayRef{0}); - auto buffPtr = builder.create(loc, buffPtrPtr); - auto buffSizePtr = builder.create( - loc, cudaq::cc::PointerType::get(strTy.getMember(1)), buff, - ArrayRef{1}); - auto buffSize = builder.create(loc, buffSizePtr); - auto sv = builder.create(loc, ty, buffPtr, - buffSize); - call.getResult(0).replaceAllUsesWith(sv); - deadCalls.push_back(call); - } - } - }); - for (auto *op : deadCalls) - op->erase(); - LLVM_DEBUG(llvm::dbgs() << "final module:\n" << module << '\n'); } out.keep(); } const DataLayout *dataLayout = nullptr; - DenseMap modifiedDevKernels; }; } // namespace diff --git a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp index ba4a87c29e..9328b78896 100644 --- a/lib/Optimizer/Transforms/LiftArrayAlloc.cpp +++ b/lib/Optimizer/Transforms/LiftArrayAlloc.cpp @@ -191,8 +191,10 @@ class AllocaPattern : public OpRewritePattern { toErase.push_back(user); } if (toGlobal) { - rewriter.setInsertionPointAfter(alloc); - rewriter.replaceOp(alloc, conGlobal); + if (conGlobal) { + rewriter.setInsertionPointAfter(alloc); + rewriter.replaceOp(alloc, conGlobal); + } } else { toErase.push_back(alloc); } diff --git a/targettests/execution/auto_kernel-cpp17.cpp b/targettests/execution/auto_kernel-cpp17.cpp index f3b2f3dc65..04b0353113 100644 --- a/targettests/execution/auto_kernel-cpp17.cpp +++ b/targettests/execution/auto_kernel-cpp17.cpp @@ -7,7 +7,7 @@ ******************************************************************************/ // REQUIRES: c++17 -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ -std=c++17 --enable-mlir %s -o %t && %t | FileCheck %s #include diff --git a/targettests/execution/auto_kernel.cpp b/targettests/execution/auto_kernel.cpp index f52b13a7f0..1aec262e2a 100644 --- a/targettests/execution/auto_kernel.cpp +++ b/targettests/execution/auto_kernel.cpp @@ -7,7 +7,7 @@ ******************************************************************************/ // REQUIRES: c++20 -// RUN: nvq++ %cpp_std --enable-mlir %s -o %t && %t | FileCheck %s +// RUN: nvq++ --enable-mlir %s -o %t && %t | FileCheck %s #include diff --git a/test/AST-Quake/calling_convention.cpp b/test/AST-Quake/calling_convention.cpp new file mode 100644 index 0000000000..3d2c6e2e4a --- /dev/null +++ b/test/AST-Quake/calling_convention.cpp @@ -0,0 +1,335 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// This test is only valid for x86_64. +// RUN: if [ `uname -m` = "x86_64" ] ; then \ +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s ; fi + +#include +#include +#include + +// Tests the host-side signatures of various spec supported kernel arguments and +// results. This file tests the x86_64 calling convention. Other architectures +// differ in their calling conventions. + +//===----------------------------------------------------------------------===// +// test all the basic arithmetic types to deny any regressions. + +struct T0 { + void operator()() __qpu__ {} +}; + +struct T1 { + void operator()(double arg) __qpu__ {} +}; + +struct T2 { + void operator()(float arg) __qpu__ {} +}; + +struct T3 { + void operator()(long long arg) __qpu__ {} +}; + +struct T4 { + void operator()(long arg) __qpu__ {} +}; + +struct T5 { + void operator()(int arg) __qpu__ {} +}; + +struct T6 { + void operator()(short arg) __qpu__ {} +}; + +struct T7 { + void operator()(char arg) __qpu__ {} +}; + +struct T8 { + void operator()(bool arg) __qpu__ {} +}; + +// CHECK-LABEL: func.func @_ZN2T0clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) { +// CHECK-LABEL: func.func @_ZN2T1clEd( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: f64) { +// CHECK-LABEL: func.func @_ZN2T2clEf( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: f32) { +// CHECK-LABEL: func.func @_ZN2T3clEx( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i64) { +// CHECK-LABEL: func.func @_ZN2T4clEl( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i64) { +// CHECK-LABEL: func.func @_ZN2T5clEi( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i32) { +// CHECK-LABEL: func.func @_ZN2T6clEs( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i16) { +// CHECK-LABEL: func.func @_ZN2T7clEc( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i8) { +// CHECK-LABEL: func.func @_ZN2T8clEb( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i1) { + +struct R0 { + void operator()() __qpu__ {} +}; + +struct R1 { + double operator()() __qpu__ { return {}; } +}; + +struct R2 { + float operator()() __qpu__ { return {}; } +}; + +struct R3 { + long long operator()() __qpu__ { return {}; } +}; + +struct R4 { + long operator()() __qpu__ { return {}; } +}; + +struct R5 { + int operator()() __qpu__ { return {}; } +}; + +struct R6 { + short operator()() __qpu__ { return {}; } +}; + +struct R7 { + char operator()() __qpu__ { return {}; } +}; + +struct R8 { + bool operator()() __qpu__ { return {}; } +}; + +// CHECK-LABEL: func.func @_ZN2R0clEv(%arg0: !cc.ptr) { +// CHECK-LABEL: func.func @_ZN2R1clEv(%arg0: !cc.ptr) -> f64 { +// CHECK-LABEL: func.func @_ZN2R2clEv(%arg0: !cc.ptr) -> f32 { +// CHECK-LABEL: func.func @_ZN2R3clEv(%arg0: !cc.ptr) -> i64 { +// CHECK-LABEL: func.func @_ZN2R4clEv(%arg0: !cc.ptr) -> i64 { +// CHECK-LABEL: func.func @_ZN2R5clEv(%arg0: !cc.ptr) -> i32 { +// CHECK-LABEL: func.func @_ZN2R6clEv(%arg0: !cc.ptr) -> i16 { +// CHECK-LABEL: func.func @_ZN2R7clEv(%arg0: !cc.ptr) -> i8 { +// CHECK-LABEL: func.func @_ZN2R8clEv(%arg0: !cc.ptr) -> i1 { + +//===----------------------------------------------------------------------===// +// structs that are less than 128 bits. +// arguments may be merged into 1 register or passed in pair of registers. +// results are returned in registers. + +struct G0 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct G1 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct G2 { + std::pair operator()(std::pair, + std::pair) __qpu__ { + return {}; + } +}; + +struct G3 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct BB { + bool _1; + bool _2; + bool _3; +}; + +BB glue0(); + +struct G4 { + std::pair operator()(BB) __qpu__ { return {}; } +}; + +struct II { + int _1; + int _2; + int _3; +}; + +II glue1(); + +struct G5 { + std::pair operator()(II) __qpu__ { return {}; } +}; + +struct CC { + char _1; + unsigned char _2; + signed char _3; +}; + +CC glue2(); + +struct G6 { + std::pair operator()(CC) __qpu__ { return {}; } +}; + +struct G7 { + BB operator()(BB, II, CC) __qpu__ { return glue0(); } +}; + +struct G8 { + II operator()(II, CC, BB) __qpu__ { return glue1(); } +}; + +struct G9 { + CC operator()(CC, BB, II) __qpu__ { return glue2(); } +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2G0clESt4pairIddE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: f64, +// CHECK-SAME: %[[VAL_2:.*]]: f64) -> i16 +// CHECK-LABEL: func.func @_ZN2G1clESt4pairIffE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: vector<2xf32>) +// CHECK-SAME: -> i16 +// CHECK-LABEL: func.func @_ZN2G2clESt4pairIllES0_IidE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: i64, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32, +// CHECK-SAME: %[[VAL_4:.*]]: f64) -> i24 +// CHECK-LABEL: func.func @_ZN2G3clESt4pairIdbE( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: f64, +// CHECK-SAME: %[[VAL_3:.*]]: i8) -> i32 +// CHECK-LABEL: func.func @_ZN2G4clE2BB( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i24) -> i64 +// CHECK-LABEL: func.func @_ZN2G5clE2II( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: i32) -> !cc.struct<{i64, f32}> +// CHECK-LABEL: func.func @_ZN2G6clE2CC( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i24) -> !cc.struct<{i64, i64}> +// CHECK-LABEL: func.func @_ZN2G7clE2BB2II2CC( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i24, +// CHECK-SAME: %[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i32, +// CHECK-SAME: %[[VAL_5:.*]]: i24) -> i24 +// CHECK-LABEL: func.func @_ZN2G8clE2II2CC2BB( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i64, +// CHECK-SAME: %[[VAL_3:.*]]: i32, %[[VAL_4:.*]]: i24, +// CHECK-SAME: %[[VAL_5:.*]]: i24) -> !cc.struct<{i64, i32}> +// CHECK-LABEL: func.func @_ZN2G9clE2CC2BB2II( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: i24, %[[VAL_2:.*]]: i24, +// CHECK-SAME: %[[VAL_3:.*]]: i64, %[[VAL_4:.*]]: i32) -> i24 +// clang-format on + +//===----------------------------------------------------------------------===// +// std::vector - these get converted to sret and byval ptrs on host side. + +std::vector make_believe(); + +struct V0 { + std::vector operator()() __qpu__ { return make_believe(); } +}; + +std::vector make_coffee(); + +struct V1 { + std::vector operator()(std::vector) __qpu__ { + return make_coffee(); + } +}; + +std::vector> make_crazy(); + +struct V2 { + std::vector> operator()(std::vector, + std::vector) __qpu__ { + return make_crazy(); + } +}; + +struct V3 { + void operator()(std::vector, std::vector) __qpu__ {} +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2V0clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>> {llvm.sret = !cc.struct<{!cc.ptr, !cc.ptr, !cc.ptr}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr) +// CHECK-LABEL: func.func @_ZN2V1clESt6vectorIdSaIdEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>> {llvm.sret = !cc.struct<{!cc.ptr, !cc.ptr, !cc.ptr}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// CHECK-LABEL: func.func @_ZN2V2clESt6vectorIfSaIfEES0_IsSaIsEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr>, !cc.ptr>, !cc.ptr>}>> {llvm.sret = !cc.struct<{!cc.ptr>, !cc.ptr>, !cc.ptr>}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>, +// CHECK-SAME: %[[VAL_3:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// CHECK-LABEL: func.func @_ZN2V3clESt6vectorIlSaIlEES0_IbSaIbEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// clang-format on + +//===----------------------------------------------------------------------===// +// structs that are more than 128 bits. These get converted to sret or byval +// ptrs on the host side. + +struct B0 { + void operator()(std::tuple) __qpu__ {} +}; + +struct BG { + float _1[4]; + int _2[5]; +}; + +BG make_sausage(); + +struct B1 { + BG operator()() __qpu__ { return make_sausage(); } +}; + +std::tuple make_interesting(); + +struct B2 { + std::tuple operator()(BG) __qpu__ { + return make_interesting(); + } +}; + +struct BA { + bool _1[64]; +}; + +struct B3 { + BA operator()(BA arg) __qpu__ { return arg; } +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2B0clESt5tupleIJdicfsEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: !cc.ptr>) { +// CHECK-LABEL: func.func @_ZN2B1clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.array} [288,4]>> {llvm.sret = !cc.struct<"BG" {!cc.array, !cc.array} [288,4]>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr) +// CHECK-LABEL: func.func @_ZN2B2clE2BG( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr> {llvm.sret = !cc.struct<{f64, f64, i16, f32, i8, i32}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.array} [288,4]>> {llvm.byval = !cc.struct<"BG" {!cc.array, !cc.array} [288,4]>}) +// CHECK-LABEL: func.func @_ZN2B3clE2BA( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr} [512,1]>> {llvm.sret = !cc.struct<"BA" {!cc.array} [512,1]>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr} [512,1]>> {llvm.byval = !cc.struct<"BA" {!cc.array} [512,1]>}) +// clang-format on diff --git a/test/AST-Quake/vector_int-1.cpp b/test/AST-Quake/vector_int-1.cpp index 3bdfae634f..a5a989f6bf 100644 --- a/test/AST-Quake/vector_int-1.cpp +++ b/test/AST-Quake/vector_int-1.cpp @@ -22,8 +22,7 @@ __qpu__ void touringLondon() { return; } -// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv( -// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, i64}>> {llvm.sret = !cc.struct<{!cc.ptr, i64}>}) attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { +// CHECK-LABEL: func.func @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv() -> !cc.stdvec attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { // CHECK: %[[VAL_1:.*]] = arith.constant 2 : i64 // CHECK: %[[VAL_2:.*]] = arith.constant 4 : i64 // CHECK: %[[VAL_3:.*]] = arith.constant 2 : i32 @@ -33,29 +32,15 @@ __qpu__ void touringLondon() { // CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr>) -> !cc.ptr // CHECK: %[[VAL_7:.*]] = call @__nvqpp_vectorCopyCtor(%[[VAL_6]], %[[VAL_1]], %[[VAL_2]]) : (!cc.ptr, i64, i64) -> !cc.ptr // CHECK: %[[VAL_8:.*]] = cc.stdvec_init %[[VAL_7]], %[[VAL_1]] : (!cc.ptr, i64) -> !cc.stdvec -// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> -// CHECK: %[[VAL_10:.*]] = cc.stdvec_data %[[VAL_8]] : (!cc.stdvec) -> !cc.ptr -// CHECK: %[[VAL_11:.*]] = cc.compute_ptr %[[VAL_9]][0] : (!cc.ptr, i64}>>) -> !cc.ptr> -// CHECK: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_10]], %[[VAL_12]] : !cc.ptr> -// CHECK: %[[VAL_13:.*]] = cc.stdvec_size %[[VAL_8]] : (!cc.stdvec) -> i64 -// CHECK: %[[VAL_14:.*]] = cc.compute_ptr %[[VAL_9]][1] : (!cc.ptr, i64}>>) -> !cc.ptr -// CHECK: cc.store %[[VAL_13]], %[[VAL_14]] : !cc.ptr -// CHECK: return +// CHECK: return %[[VAL_8]] : !cc.stdvec // CHECK: } // CHECK-LABEL: func.func @__nvqpp__mlirgen__function_touringLondon._Z13touringLondonv() attributes {"cudaq-entrypoint", "cudaq-kernel", no_this} { -// CHECK: %[[VAL_0:.*]] = cc.alloca !cc.struct<{!cc.ptr, i64}> -// CHECK: call @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv(%[[VAL_0]]) : (!cc.ptr, i64}>>) -> () -// CHECK: %[[VAL_10:.*]] = cc.compute_ptr %[[VAL_0]][0] : (!cc.ptr, i64}>>) -> !cc.ptr> -// CHECK: %[[VAL_1:.*]] = cc.load %[[VAL_10]] : !cc.ptr> -// CHECK: %[[VAL_2:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr, i64}>>) -> !cc.ptr -// CHECK: %[[VAL_3:.*]] = cc.load %[[VAL_2]] : !cc.ptr -// CHECK: %[[VAL_4:.*]] = cc.stdvec_init %[[VAL_1]], %[[VAL_3]] : (!cc.ptr, i64) -> !cc.stdvec -// CHECK: %[[VAL_5:.*]] = cc.stdvec_data %[[VAL_4]] : (!cc.stdvec) -> !cc.ptr> +// CHECK: %[[VAL_0:.*]] = call @__nvqpp__mlirgen__function_doubleDeckerBus._Z15doubleDeckerBusv() : () -> !cc.stdvec +// CHECK: %[[VAL_5:.*]] = cc.stdvec_data %[[VAL_0]] : (!cc.stdvec) -> !cc.ptr> // CHECK: %[[VAL_6:.*]] = cc.cast %[[VAL_5]] : (!cc.ptr>) -> !cc.ptr // CHECK: %[[VAL_7:.*]] = cc.load %[[VAL_6]] : !cc.ptr // CHECK: %[[VAL_8:.*]] = cc.cast signed %[[VAL_7]] : (i32) -> i64 -// CHECK: %[[VAL_9:.*]] = quake.alloca !quake.veq{{\[}}%[[VAL_8]] : i64] +// CHECK: %[[VAL_9:.*]] = quake.alloca !quake.veq[%[[VAL_8]] : i64] // CHECK: return // CHECK: } diff --git a/test/Quake-QIR/return_values.qke b/test/Quake-QIR/return_values.qke index a4fbfa7477..085b9fec97 100644 --- a/test/Quake-QIR/return_values.qke +++ b/test/Quake-QIR/return_values.qke @@ -6,20 +6,22 @@ // the terms of the Apache License 2.0 which accompanies this distribution. // // ========================================================================== // -// RUN: cudaq-opt --add-dealloc --kernel-execution=codegen=1 --canonicalize %s | \ -// RUN: cudaq-translate --convert-to=qir | FileCheck %s +// RUN: cudaq-opt --add-dealloc --kernel-execution=codegen=1 --canonicalize %s | cudaq-translate --convert-to=qir | FileCheck %s // NB: the mangled name map is required for the kernel-execution pass. +// QIR codegen requires the target triple. module attributes{ quake.mangled_name_map = { __nvqpp__mlirgen__test_0 = "test_0", __nvqpp__mlirgen__test_1 = "test_1", __nvqpp__mlirgen__test_2 = "test_2", __nvqpp__mlirgen__test_3 = "test_3", __nvqpp__mlirgen__test_4 = "test_4", - __nvqpp__mlirgen__test_5 = "test_5" }} { + __nvqpp__mlirgen__test_5 = "test_5" }, + llvm.triple = "x86_64-unknown-linux-gnu"} { func.func private @__nvqpp_vectorCopyCtor(%arg0: !cc.ptr , %arg1: i64 , %arg2: i64 ) -> !cc.ptr +// vector -> struct ptr sret func.func @__nvqpp__mlirgen__test_0(%arg0: i32) -> !cc.stdvec { %c1_i64 = arith.constant 1 : i64 %c1 = arith.constant 1 : i64 @@ -56,8 +58,8 @@ func.func @test_0(%1: !cc.ptr, !cc.ptr, !cc.ptr} return } -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_0({ i8*, i64 }* nocapture writeonly sret({ i8*, i64 }) -// CHECK-SAME: %[[VAL_0:.*]], i32 %[[VAL_1:.*]]) {{.*}}{ +// CHECK-LABEL: define { i1*, i64 } @__nvqpp__mlirgen__test_0( +// CHECK-SAME: i32 %[[VAL_1:.*]]) {{.*}}{ // CHECK: %[[VAL_2:.*]] = sext i32 %[[VAL_1]] to i64 // CHECK: %[[VAL_3:.*]] = tail call %[[VAL_4:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_2]]) // CHECK: %[[VAL_5:.*]] = tail call i64 @__quantum__rt__array_get_size_1d(%[[VAL_4]]* %[[VAL_3]]) @@ -95,12 +97,11 @@ func.func @test_0(%1: !cc.ptr, !cc.ptr, !cc.ptr} // CHECK: ._crit_edge5: ; preds = %[[VAL_21]], %[[VAL_8]], %[[VAL_19]] // CHECK: %[[VAL_34:.*]] = phi i8* [ %[[VAL_10]], %[[VAL_8]] ], [ %[[VAL_20]], %[[VAL_19]] ], [ %[[VAL_20]], %[[VAL_21]] ] // CHECK: %[[VAL_35:.*]] = call i8* @__nvqpp_vectorCopyCtor(i8* nonnull %[[VAL_34]], i64 %[[VAL_5]], i64 1) -// CHECK: call void @__quantum__rt__qubit_release_array(%[[VAL_4]]* %[[VAL_3]]) -// CHECK: %[[VAL_36:.*]] = getelementptr inbounds { i8*, i64 }, { i8*, i64 }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i8* %[[VAL_35]], i8** %[[VAL_36]], align 8 -// CHECK: %[[VAL_37:.*]] = getelementptr { i8*, i64 }, { i8*, i64 }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store i64 %[[VAL_5]], i64* %[[VAL_37]], align 8 -// CHECK: ret void +// CHECK: %[[VAL_36:.*]] = bitcast i8* %[[VAL_35]] to i1* +// CHECK: %[[VAL_37:.*]] = insertvalue { i1*, i64 } undef, i1* %[[VAL_36]], 0 +// CHECK: %[[VAL_38:.*]] = insertvalue { i1*, i64 } %[[VAL_37]], i64 %[[VAL_5]], 1 +// CHECK: call void @__quantum__rt__qubit_release_array(%Array* %[[VAL_3]]) +// CHECK: ret { i1*, i64 } %[[VAL_38]] // CHECK: } // CHECK-LABEL: define void @test_0({ i8*, i8*, i8* }* sret({ i8*, i8*, i8* }) @@ -120,6 +121,7 @@ func.func @test_0(%1: !cc.ptr, !cc.ptr, !cc.ptr} // CHECK: ret void // CHECK: } +// struct{bool, bool} -> i16 func.func @__nvqpp__mlirgen__test_1() -> !cc.struct<{i1, i1}> { %qubits = quake.alloca !quake.veq<2> %q0 = quake.extract_ref %qubits[0] : (!quake.veq<2>) -> !quake.ref @@ -136,12 +138,12 @@ func.func @__nvqpp__mlirgen__test_1() -> !cc.struct<{i1, i1}> { return %rv2 : !cc.struct<{i1, i1}> } -func.func @test_1(%1: !cc.ptr> {llvm.sret = !cc.struct<{i1, i1}>}, %this: !cc.ptr) { - return +func.func @test_1(%this: !cc.ptr) -> i16 { + %0 = cc.undef i16 + return %0 : i16 } -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_1({ i1, i1 }* nocapture writeonly sret({ i1, i1 }) -// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ +// CHECK-LABEL: define { i1, i1 } @__nvqpp__mlirgen__test_1() // CHECK: %[[VAL_1:.*]] = tail call %[[VAL_2:.*]]* @__quantum__rt__qubit_allocate_array(i64 2) // CHECK: %[[VAL_3:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_2]]* %[[VAL_1]], i64 0) // CHECK: %[[VAL_4:.*]] = bitcast i8* %[[VAL_3]] to %[[VAL_5:.*]]** @@ -152,37 +154,27 @@ func.func @test_1(%1: !cc.ptr> {llvm.sret = !cc.struct<{i1, // CHECK: tail call void @__quantum__qis__h(%[[VAL_5]]* %[[VAL_6]]) // CHECK: tail call void (i64, void (%[[VAL_2]]*, %[[VAL_5]]*)*, ...) @invokeWithControlQubits(i64 1, void (%[[VAL_2]]*, %[[VAL_5]]*)* nonnull @__quantum__qis__x__ctl, %[[VAL_5]]* %[[VAL_6]], %[[VAL_5]]* %[[VAL_9]]) // CHECK: %[[VAL_10:.*]] = tail call %[[VAL_11:.*]]* @__quantum__qis__mz(%[[VAL_5]]* %[[VAL_6]]) -// CHECK: %[[VAL_12:.*]] = bitcast %[[VAL_11]]* %[[VAL_10]] to i1* +// CHECK: %[[VAL_12:.*]] = bitcast %Result* %[[VAL_10]] to i1* // CHECK: %[[VAL_13:.*]] = load i1, i1* %[[VAL_12]], align 1 // CHECK: %[[VAL_14:.*]] = tail call %[[VAL_11]]* @__quantum__qis__mz(%[[VAL_5]]* %[[VAL_9]]) -// CHECK: %[[VAL_15:.*]] = bitcast %[[VAL_11]]* %[[VAL_14]] to i1* +// CHECK: %[[VAL_15:.*]] = bitcast %Result* %[[VAL_14]] to i1* // CHECK: %[[VAL_16:.*]] = load i1, i1* %[[VAL_15]], align 1 -// CHECK: %[[VAL_17:.*]] = getelementptr inbounds { i1, i1 }, { i1, i1 }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i1 %[[VAL_13]], i1* %[[VAL_17]], align 1 -// CHECK: %[[VAL_18:.*]] = getelementptr inbounds { i1, i1 }, { i1, i1 }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store i1 %[[VAL_16]], i1* %[[VAL_18]], align 1 +// CHECK: %[[VAL_20:.*]] = insertvalue { i1, i1 } undef, i1 %[[VAL_13]], 0 +// CHECK: %[[VAL_19:.*]] = insertvalue { i1, i1 } %[[VAL_20]], i1 %[[VAL_16]], 1 // CHECK: tail call void @__quantum__rt__qubit_release_array(%[[VAL_2]]* %[[VAL_1]]) -// CHECK: ret void +// CHECK: ret { i1, i1 } %[[VAL_19]] // CHECK: } -// CHECK-LABEL: define void @test_1({ i1, i1 }* nocapture writeonly sret({ i1, i1 }) -// CHECK-SAME: %[[VAL_0:.*]], i8* nocapture readnone %[[VAL_1:.*]]) {{.*}}{ -// CHECK: %[[VAL_2:.*]] = alloca [2 x i8], align 8 -// CHECK: %[[VAL_3:.*]] = getelementptr inbounds [2 x i8], [2 x i8]* %[[VAL_2]], i64 0, i64 0 +// CHECK-LABEL: define i16 @test_1(i8* nocapture readnone +// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ +// CHECK-NEXT: %[[VAL_2:.*]] = alloca i16, align 8 +// CHECK: %[[VAL_3:.*]] = bitcast i16* %[[VAL_2]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_1.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_1.thunk to i8*), i8* nonnull %[[VAL_3]], i64 2, i64 0) -// CHECK: %[[VAL_4:.*]] = bitcast [2 x i8]* %[[VAL_2]] to i1* -// CHECK: %[[VAL_5:.*]] = load i1, i1* %[[VAL_4]], align 8 -// CHECK: %[[VAL_6:.*]] = getelementptr inbounds [2 x i8], [2 x i8]* %[[VAL_2]], i64 0, i64 1 -// CHECK: %[[VAL_7:.*]] = bitcast i8* %[[VAL_6]] to i1* -// CHECK: %[[VAL_8:.*]] = load i1, i1* %[[VAL_7]], align 1 -// CHECK: %[[VAL_9:.*]] = getelementptr inbounds { i1, i1 }, { i1, i1 }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i1 %[[VAL_5]], i1* %[[VAL_9]], align 1 -// CHECK: %[[VAL_10:.*]] = getelementptr inbounds { i1, i1 }, { i1, i1 }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store i1 %[[VAL_8]], i1* %[[VAL_10]], align 1 -// CHECK: ret void +// CHECK: %[[VAL_4:.*]] = load i16, i16* %[[VAL_2]], align 8 +// CHECK: ret i16 %[[VAL_4]] // CHECK: } - +// struct{i16, f32, f64, i64} -> sret ptr func.func @__nvqpp__mlirgen__test_2() -> !cc.struct<{i16, f32, f64, i64}> { %rv = cc.undef !cc.struct<{i16, f32, f64, i64}> %c1 = arith.constant 8 : i16 @@ -200,10 +192,8 @@ func.func @test_2(%1: !cc.ptr> {llvm.sret = !cc return } -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_2({ i16, float, double, i64 }* nocapture writeonly sret({ i16, float, double, i64 }) -// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ -// CHECK: store { i16, float, double, i64 } { i16 8, float 0x40159999A0000000, double 3.783000e+01, i64 1479 }, { i16, float, double, i64 }* %[[VAL_0]], align 8 -// CHECK: ret void +// CHECK-LABEL: define { i16, float, double, i64 } @__nvqpp__mlirgen__test_2() +// CHECK: ret { i16, float, double, i64 } { i16 8, float 0x40159999A0000000, double 3.783000e+01, i64 1479 } // CHECK: } // CHECK-LABEL: define void @test_2({ i16, float, double, i64 }* nocapture writeonly sret({ i16, float, double, i64 }) @@ -211,22 +201,12 @@ func.func @test_2(%1: !cc.ptr> {llvm.sret = !cc // CHECK: %[[VAL_2:.*]] = alloca { { i16, float, double, i64 } }, align 8 // CHECK: %[[VAL_3:.*]] = bitcast { { i16, float, double, i64 } }* %[[VAL_2]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_2.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_2.thunk to i8*), i8* nonnull %[[VAL_3]], i64 24, i64 0) -// CHECK: %[[VAL_4:.*]] = getelementptr inbounds { { i16, float, double, i64 } }, { { i16, float, double, i64 } }* %[[VAL_2]], i64 0, i32 0, i32 0 -// CHECK: %[[VAL_5:.*]] = load i16, i16* %[[VAL_4]], align 8 -// CHECK: %[[VAL_6:.*]] = insertvalue { i16, float, double, i64 } poison, i16 %[[VAL_5]], 0 -// CHECK: %[[VAL_7:.*]] = getelementptr inbounds { { i16, float, double, i64 } }, { { i16, float, double, i64 } }* %[[VAL_2]], i64 0, i32 0, i32 1 -// CHECK: %[[VAL_8:.*]] = load float, float* %[[VAL_7]], align 4 -// CHECK: %[[VAL_9:.*]] = insertvalue { i16, float, double, i64 } %[[VAL_6]], float %[[VAL_8]], 1 -// CHECK: %[[VAL_10:.*]] = getelementptr inbounds { { i16, float, double, i64 } }, { { i16, float, double, i64 } }* %[[VAL_2]], i64 0, i32 0, i32 2 -// CHECK: %[[VAL_11:.*]] = load double, double* %[[VAL_10]], align 8 -// CHECK: %[[VAL_12:.*]] = insertvalue { i16, float, double, i64 } %[[VAL_9]], double %[[VAL_11]], 2 -// CHECK: %[[VAL_13:.*]] = getelementptr inbounds { { i16, float, double, i64 } }, { { i16, float, double, i64 } }* %[[VAL_2]], i64 0, i32 0, i32 3 -// CHECK: %[[VAL_14:.*]] = load i64, i64* %[[VAL_13]], align 8 -// CHECK: %[[VAL_15:.*]] = insertvalue { i16, float, double, i64 } %[[VAL_12]], i64 %[[VAL_14]], 3 -// CHECK: store { i16, float, double, i64 } %[[VAL_15]], { i16, float, double, i64 }* %[[VAL_0]], align 8 +// CHECK: %[[VAL_4:.*]] = bitcast { i16, float, double, i64 }* %[[VAL_0]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(24) %[[VAL_4]], i8* noundef nonnull align 8 dereferenceable(24) %[[VAL_3]], i64 24, i1 false) // CHECK: ret void // CHECK: } +// array -> sret ptr func.func @__nvqpp__mlirgen__test_3() -> !cc.array { %rv = cc.undef !cc.array %c1 = arith.constant 5 : i64 @@ -246,19 +226,8 @@ func.func @test_3(%1: !cc.ptr> {llvm.sret = !cc.array> {llvm.sret = !cc.array { i64, f64 } func.func @__nvqpp__mlirgen__test_4() -> (i64, f64) { %c1 = arith.constant 537892 : i64 %c2 = arith.constant 94.2134 : f64 return %c1, %c2 : i64, f64 } -func.func @test_4(%1: !cc.ptr> {llvm.sret = !cc.struct<{i64, f64}>}, %this: !cc.ptr) { +func.func @test_4(%sret: !cc.ptr> {llvm.sret = !cc.struct<{i64, f64}>}, %this: !cc.ptr) { return } -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_4({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ -// CHECK: %[[VAL_1:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i64 537892, i64* %[[VAL_1]], align 8 -// CHECK: %[[VAL_2:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store double 0x40578DA858793DD9, double* %[[VAL_2]], align 8 -// CHECK: ret void +// CHECK-LABEL: define { i64, double } @__nvqpp__mlirgen__test_4() {{.*}}{ +// CHECK: ret { i64, double } { i64 537892, double 0x40578DA858793DD9 } // CHECK: } // CHECK-LABEL: define void @test_4({ i64, double }* nocapture writeonly sret({ i64, double }) @@ -313,14 +260,8 @@ func.func @test_4(%1: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: %[[VAL_2:.*]] = alloca { i64, double }, align 8 // CHECK: %[[VAL_3:.*]] = bitcast { i64, double }* %[[VAL_2]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_4.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_4.thunk to i8*), i8* nonnull %[[VAL_3]], i64 16, i64 0) -// CHECK: %[[VAL_4:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_2]], i64 0, i32 0 -// CHECK: %[[VAL_5:.*]] = load i64, i64* %[[VAL_4]], align 8 -// CHECK: %[[VAL_6:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i64 %[[VAL_5]], i64* %[[VAL_6]], align 8 -// CHECK: %[[VAL_7:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_2]], i64 0, i32 1 -// CHECK: %[[VAL_8:.*]] = load double, double* %[[VAL_7]], align 8 -// CHECK: %[[VAL_9:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store double %[[VAL_8]], double* %[[VAL_9]], align 8 +// CHECK: %[[VAL_4:.*]] = bitcast { i64, double }* %[[VAL_0]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) %[[VAL_4]], i8* noundef nonnull align 8 dereferenceable(16) %[[VAL_3]], i64 16, i1 false) // CHECK: ret void // CHECK: } @@ -330,17 +271,12 @@ func.func @__nvqpp__mlirgen__test_5() -> (i64, f64) attributes {no_this} { return %c1, %c2 : i64, f64 } -func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i64, f64}>}) { +func.func @test_5(%sret: !cc.ptr> {llvm.sret = !cc.struct<{i64, f64}>}) { return } -// CHECK-LABEL: define void @__nvqpp__mlirgen__test_5({ i64, double }* nocapture writeonly sret({ i64, double }) -// CHECK-SAME: %[[VAL_0:.*]]) {{.*}}{ -// CHECK: %[[VAL_1:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i64 537892, i64* %[[VAL_1]], align 8 -// CHECK: %[[VAL_2:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store double 0x40578DA858793DD9, double* %[[VAL_2]], align 8 -// CHECK: ret void +// CHECK-LABEL: define { i64, double } @__nvqpp__mlirgen__test_5() {{.*}}{ +// CHECK: ret { i64, double } { i64 537892, double 0x40578DA858793DD9 } // CHECK: } // CHECK-LABEL: define void @test_5({ i64, double }* nocapture writeonly sret({ i64, double }) @@ -348,14 +284,8 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: %[[VAL_1:.*]] = alloca { i64, double }, align 8 // CHECK: %[[VAL_2:.*]] = bitcast { i64, double }* %[[VAL_1]] to i8* // CHECK: call void @altLaunchKernel(i8* nonnull getelementptr inbounds ([7 x i8], [7 x i8]* @test_5.kernelName, i64 0, i64 0), i8* nonnull bitcast ({ i8*, i64 } (i8*, i1)* @test_5.thunk to i8*), i8* nonnull %[[VAL_2]], i64 16, i64 0) -// CHECK: %[[VAL_3:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_1]], i64 0, i32 0 -// CHECK: %[[VAL_4:.*]] = load i64, i64* %[[VAL_3]], align 8 -// CHECK: %[[VAL_5:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 0 -// CHECK: store i64 %[[VAL_4]], i64* %[[VAL_5]], align 8 -// CHECK: %[[VAL_6:.*]] = getelementptr inbounds { i64, double }, { i64, double }* %[[VAL_1]], i64 0, i32 1 -// CHECK: %[[VAL_7:.*]] = load double, double* %[[VAL_6]], align 8 -// CHECK: %[[VAL_8:.*]] = getelementptr { i64, double }, { i64, double }* %[[VAL_0]], i64 0, i32 1 -// CHECK: store double %[[VAL_7]], double* %[[VAL_8]], align 8 +// CHECK: %[[VAL_3:.*]] = bitcast { i64, double }* %[[VAL_0]] to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 8 dereferenceable(16) %[[VAL_3]], i8* noundef nonnull align 8 dereferenceable(16) %[[VAL_2]], i64 16, i1 false) // CHECK: ret void // CHECK: } @@ -371,7 +301,6 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK-SAME: %[[VAL_0:.*]], i1 %[[VAL_1:.*]]) { // CHECK: %[[VAL_2:.*]] = bitcast i8* %[[VAL_0]] to i32* // CHECK: %[[VAL_3:.*]] = load i32, i32* %[[VAL_2]], align 4 -// CHECK: %[[VAL_4:.*]] = getelementptr i8, i8* %[[VAL_0]], i64 8 // CHECK: %[[VAL_5:.*]] = sext i32 %[[VAL_3]] to i64 // CHECK: %[[VAL_6:.*]] = tail call %[[VAL_7:.*]]* @__quantum__rt__qubit_allocate_array(i64 %[[VAL_5]]) // CHECK: %[[VAL_8:.*]] = tail call i64 @__quantum__rt__array_get_size_1d(%[[VAL_7]]* %[[VAL_6]]) @@ -392,7 +321,7 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: ._crit_edge: ; preds = %[[VAL_10]] // CHECK: %[[VAL_23:.*]] = alloca i8, i64 %[[VAL_8]], align 1 // CHECK: br i1 %[[VAL_9]], label %[[VAL_24:.*]], label %[[VAL_14]] -// CHECK: .lr.ph4: ; preds = %[[VAL_22]], %[[VAL_24]] +// CHECK: [[VAL_24]]: ; preds = %[[VAL_22]], %[[VAL_24]] // CHECK: %[[VAL_25:.*]] = phi i64 [ %[[VAL_26:.*]], %[[VAL_24]] ], [ 0, %[[VAL_22]] ] // CHECK: %[[VAL_27:.*]] = tail call i8* @__quantum__rt__array_get_element_ptr_1d(%[[VAL_7]]* %[[VAL_6]], i64 %[[VAL_25]]) // CHECK: %[[VAL_28:.*]] = bitcast i8* %[[VAL_27]] to %[[VAL_19]]** @@ -406,20 +335,21 @@ func.func @test_5(%0: !cc.ptr> {llvm.sret = !cc.struct<{i // CHECK: %[[VAL_26]] = add nuw nsw i64 %[[VAL_25]], 1 // CHECK: %[[VAL_36:.*]] = icmp eq i64 %[[VAL_26]], %[[VAL_8]] // CHECK: br i1 %[[VAL_36]], label %[[VAL_14]], label %[[VAL_24]] -// CHECK: ._crit_edge5: ; preds = %[[VAL_24]], %[[VAL_11]], %[[VAL_22]] +// CHECK: [[VAL_14]]: ; preds = %[[VAL_24]], %[[VAL_11]], %[[VAL_22]] // CHECK: %[[VAL_37:.*]] = phi i8* [ %[[VAL_13]], %[[VAL_11]] ], [ %[[VAL_23]], %[[VAL_22]] ], [ %[[VAL_23]], %[[VAL_24]] ] // CHECK: %[[VAL_38:.*]] = call i8* @__nvqpp_vectorCopyCtor(i8* nonnull %[[VAL_37]], i64 %[[VAL_8]], i64 1) // CHECK: call void @__quantum__rt__qubit_release_array(%[[VAL_7]]* %[[VAL_6]]) -// CHECK: %[[VAL_39:.*]] = bitcast i8* %[[VAL_4]] to i8** -// CHECK: store i8* %[[VAL_38]], i8** %[[VAL_39]], align 8 -// CHECK: %[[VAL_40:.*]] = getelementptr i8, i8* %[[VAL_0]], i64 16 -// CHECK: %[[VAL_41:.*]] = bitcast i8* %[[VAL_40]] to i64* -// CHECK: store i64 %[[VAL_8]], i64* %[[VAL_41]], align 4 +// CHECK: %[[VAL_50:.*]] = getelementptr i8, i8* %[[VAL_0]], i64 8 +// CHECK: %[[VAL_51:.*]] = bitcast i8* %[[VAL_50]] to i8** +// CHECK: store i8* %[[VAL_38]], i8** %[[VAL_51]], align 8 +// CHECK: %[[VAL_52:.*]] = getelementptr i8, i8* %[[VAL_0]], i64 16 +// CHECK: %[[VAL_53:.*]] = bitcast i8* %[[VAL_52]] to i64* +// CHECK: store i64 %[[VAL_8]], i64* %[[VAL_53]], align 8 // CHECK: br i1 %[[VAL_1]], label %[[VAL_42:.*]], label %[[VAL_43:.*]] -// CHECK: common.ret: ; preds = %[[VAL_14]], %[[VAL_42]] +// CHECK: [[VAL_43]]: ; preds = %[[VAL_14]], %[[VAL_42]] // CHECK: %[[VAL_44:.*]] = phi { i8*, i64 } [ %[[VAL_45:.*]], %[[VAL_42]] ], [ zeroinitializer, %[[VAL_14]] ] // CHECK: ret { i8*, i64 } %[[VAL_44]] -// CHECK: 32: ; preds = %[[VAL_14]] +// CHECK: [[VAL_42]]: ; preds = %[[VAL_14]] // CHECK: %[[VAL_46:.*]] = add i64 %[[VAL_8]], 24 // CHECK: %[[VAL_47:.*]] = call i8* @malloc(i64 %[[VAL_46]]) // CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* noundef nonnull align 1 dereferenceable(24) %[[VAL_47]], i8* noundef nonnull align 1 dereferenceable(24) %[[VAL_0]], i64 24, i1 false) diff --git a/test/Quake/return_vector.qke b/test/Quake/return_vector.qke index 23a718bcc5..a13d0b6abe 100644 --- a/test/Quake/return_vector.qke +++ b/test/Quake/return_vector.qke @@ -29,16 +29,12 @@ func.func @test_0(%0: !cc.ptr, !cc.ptr, !cc.ptr, i64}>> {llvm.sret = !cc.struct<{!cc.ptr, i64}>}, %[[VAL_1:.*]]: i32) { +// CHECK-SAME: %[[VAL_1:.*]]: i32) -> !cc.stdvec { // CHECK-DAG: %[[VAL_2:.*]] = arith.constant 8 : i64 // CHECK-DAG: %[[VAL_3:.*]] = arith.constant 256 : i64 // CHECK: %[[VAL_4:.*]] = call @malloc(%[[VAL_3]]) : (i64) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr) -> !cc.ptr -// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr, i64}>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_5]], %[[VAL_9]] : !cc.ptr> -// CHECK: %[[VAL_7:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr, i64}>>) -> !cc.ptr -// CHECK: cc.store %[[VAL_2]], %[[VAL_7]] : !cc.ptr -// CHECK: return +// CHECK: %[[VAL_5:.*]] = cc.stdvec_init %[[VAL_4]], %[[VAL_2]] : (!cc.ptr, i64) -> !cc.stdvec +// CHECK: return %[[VAL_5]] : !cc.stdvec // CHECK: } // CHECK-LABEL: func.func @test_0( @@ -93,15 +89,11 @@ func.func @test_1(%0: !cc.ptr, !cc.ptr, !cc.ptr, i64}>> {llvm.sret = !cc.struct<{!cc.ptr, i64}>}, %[[VAL_1:.*]]: i32) { +// CHECK-SAME: %[[VAL_1:.*]]: i32) -> !cc.stdvec { // CHECK: %[[VAL_2:.*]] = arith.constant 9 : i64 // CHECK: %[[VAL_3:.*]] = arith.constant 520 : i64 // CHECK: %[[VAL_4:.*]] = call @malloc(%[[VAL_3]]) : (i64) -> !cc.ptr -// CHECK: %[[VAL_5:.*]] = cc.cast %[[VAL_4]] : (!cc.ptr) -> !cc.ptr -// CHECK: %[[VAL_8:.*]] = cc.cast %[[VAL_0]] : (!cc.ptr, i64}>>) -> !cc.ptr> -// CHECK: cc.store %[[VAL_5]], %[[VAL_8]] : !cc.ptr> -// CHECK: %[[VAL_7:.*]] = cc.compute_ptr %[[VAL_0]][1] : (!cc.ptr, i64}>>) -> !cc.ptr -// CHECK: cc.store %[[VAL_2]], %[[VAL_7]] : !cc.ptr +// CHECK: %[[VAL_5:.*]] = cc.stdvec_init %[[VAL_4]], %[[VAL_2]] : (!cc.ptr, i64) -> !cc.stdvec // CHECK: return // CHECK: } @@ -151,13 +143,13 @@ func.func @test_1(%0: !cc.ptr, !cc.ptr, !cc.ptr) -> !cc.ptr, i64}>}>> // CHECK: %[[VAL_4:.*]] = cc.load %[[VAL_3]] : !cc.ptr, i64}>}>> // CHECK: %[[VAL_7:.*]] = cc.sizeof !cc.struct<{i32, !cc.struct<{!cc.ptr, i64}>}> : i64 -// CHECK: %[[VAL_8:.*]] = cc.compute_ptr %[[VAL_3]][1] : (!cc.ptr, i64}>}>>) -> !cc.ptr, i64}>> -// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_8]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> // CHECK: %[[VAL_10:.*]] = cc.extract_value %[[VAL_4]][0] : (!cc.struct<{i32, !cc.struct<{!cc.ptr, i64}>}>) -> i32 -// CHECK: call @__nvqpp__mlirgen__test_0(%[[VAL_9]], %[[VAL_10]]) : (!cc.ptr, i64}>>, i32) -> () +// CHECK: %[[VAL_15:.*]] = call @__nvqpp__mlirgen__test_0(%[[VAL_10]]) : (i32) -> !cc.stdvec // CHECK: cf.cond_br %[[VAL_1]], ^bb1, ^bb2 // CHECK: ^bb1: // CHECK: %[[VAL_11:.*]] = cc.compute_ptr %[[VAL_3]][1] : (!cc.ptr, i64}>}>>) -> !cc.ptr, i64}> +// CHECK: %[[VAL_16:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr, i64}>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_15]], %[[VAL_16]] : !cc.ptr> // CHECK: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> // CHECK: %[[VAL_13:.*]] = call @__nvqpp_createDynamicResult(%[[VAL_0]], %[[VAL_7]], %[[VAL_12]]) : (!cc.ptr, i64, !cc.ptr, i64}>>) -> !cc.struct<{!cc.ptr, i64}> // CHECK: return %[[VAL_13]] : !cc.struct<{!cc.ptr, i64}> @@ -171,13 +163,13 @@ func.func @test_1(%0: !cc.ptr, !cc.ptr, !cc.ptr) -> !cc.ptr, i64}>}>> // CHECK: %[[VAL_4:.*]] = cc.load %[[VAL_3]] : !cc.ptr, i64}>}>> // CHECK: %[[VAL_7:.*]] = cc.sizeof !cc.struct<{i32, !cc.struct<{!cc.ptr, i64}>}> : i64 -// CHECK: %[[VAL_8:.*]] = cc.compute_ptr %[[VAL_3]][1] : (!cc.ptr, i64}>}>>) -> !cc.ptr, i64}>> -// CHECK: %[[VAL_9:.*]] = cc.cast %[[VAL_8]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> // CHECK: %[[VAL_10:.*]] = cc.extract_value %[[VAL_4]][0] : (!cc.struct<{i32, !cc.struct<{!cc.ptr, i64}>}>) -> i32 -// CHECK: call @__nvqpp__mlirgen__test_1(%[[VAL_9]], %[[VAL_10]]) : (!cc.ptr, i64}>>, i32) -> () +// CHECK: %[[VAL_15:.*]] = call @__nvqpp__mlirgen__test_1(%[[VAL_10]]) : (i32) -> !cc.stdvec // CHECK: cf.cond_br %[[VAL_1]], ^bb1, ^bb2 // CHECK: ^bb1: // CHECK: %[[VAL_11:.*]] = cc.compute_ptr %[[VAL_3]][1] : (!cc.ptr, i64}>}>>) -> !cc.ptr, i64}>> +// CHECK: %[[VAL_16:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr, i64}>>) -> !cc.ptr> +// CHECK: cc.store %[[VAL_15]], %[[VAL_16]] : !cc.ptr> // CHECK: %[[VAL_12:.*]] = cc.cast %[[VAL_11]] : (!cc.ptr, i64}>>) -> !cc.ptr, i64}>> // CHECK: %[[VAL_13:.*]] = call @__nvqpp_createDynamicResult(%[[VAL_0]], %[[VAL_7]], %[[VAL_12]]) : (!cc.ptr, i64, !cc.ptr, i64}>>) -> !cc.struct<{!cc.ptr, i64}> // CHECK: return %[[VAL_13]] : !cc.struct<{!cc.ptr, i64}> From 949d44428d9911b18438f5c57d10b22c006a8b9f Mon Sep 17 00:00:00 2001 From: Eric Schweitz Date: Wed, 16 Oct 2024 13:17:30 -0700 Subject: [PATCH 2/5] [core] Add support for aarch64 calling conventions. (#2278) Fix calling conventions for aarch64 and add a regression test. Requires #2268 to be merged. Signed-off-by: Eric Schweitz --- lib/Optimizer/Builder/Factory.cpp | 56 ++- test/AST-Quake/calling_convention-aarch64.cpp | 326 ++++++++++++++++++ 2 files changed, 370 insertions(+), 12 deletions(-) create mode 100644 test/AST-Quake/calling_convention-aarch64.cpp diff --git a/lib/Optimizer/Builder/Factory.cpp b/lib/Optimizer/Builder/Factory.cpp index 73b66cdac3..421943c6c9 100644 --- a/lib/Optimizer/Builder/Factory.cpp +++ b/lib/Optimizer/Builder/Factory.cpp @@ -382,14 +382,15 @@ static Type convertToHostSideType(Type ty) { // integers of size 8, 16, 24, 32 or 64 together, unless the float member fits // by itself. static bool shouldExpand(SmallVectorImpl &packedTys, - cc::StructType structTy) { + cc::StructType structTy, unsigned scaling = 8) { if (structTy.isEmpty()) return false; auto *ctx = structTy.getContext(); unsigned bits = 0; + const auto scaleBy = scaling - 1; auto scaleBits = [&](unsigned size) { if (size < 32) - size = (size + 7) & ~7u; + size = (size + scaleBy) & ~scaleBy; if (size > 32 && size <= 64) size = 64; return size; @@ -525,6 +526,17 @@ static bool onlyArithmeticMembers(cc::StructType structTy) { return true; } +// Unchecked precondition: structTy must be entirely arithmetic. +static unsigned getLargestWidth(cc::StructType structTy) { + unsigned largest = 8; + for (auto ty : structTy.getMembers()) { + auto width = ty.getIntOrFloatBitWidth(); + if (width > largest) + largest = width; + } + return largest; +} + // When the kernel comes from a class, there is always a default `this` argument // to the kernel entry function. The CUDA-Q spec doesn't allow the kernel // object to contain data members (yet), so we can ignore the `this` pointer. @@ -534,16 +546,31 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, SmallVector inputTys; bool hasSRet = false; Type resultTy; + auto i64Ty = IntegerType::get(ctx, 64); if (funcTy.getNumResults() == 1) if (auto strTy = dyn_cast(funcTy.getResult(0))) if (strTy.getBitSize() != 0 && strTy.getBitSize() <= CommonSmallStructSize) { - SmallVector packedTys; - if (shouldExpand(packedTys, strTy) || !packedTys.empty()) { - if (packedTys.size() == 1) - resultTy = packedTys[0]; - else - resultTy = cc::StructType::get(ctx, packedTys); + if (isX86_64(module)) { + // X86_64: Byte addressable scaling (packed registers). Default is a + // struct. + SmallVector packedTys; + if (shouldExpand(packedTys, strTy) || !packedTys.empty()) { + if (packedTys.size() == 1) + resultTy = packedTys[0]; + else + resultTy = cc::StructType::get(ctx, packedTys); + } + } else if (isAArch64(module) && onlyArithmeticMembers(strTy)) { + // AARCH64: Padded registers. Default is a two-element array. + unsigned largest = getLargestWidth(strTy); + SmallVector packedTys; + if (shouldExpand(packedTys, strTy, largest) || !packedTys.empty()) { + if (packedTys.size() == 1) + resultTy = packedTys[0]; + else + resultTy = cc::ArrayType::get(ctx, packedTys[0], 2); + } } } if (!resultTy && funcTy.getNumResults()) { @@ -562,7 +589,6 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, } // If this kernel is a plain old function or a static member function, we // don't want to add a hidden `this` argument. - auto i64Ty = IntegerType::get(ctx, 64); auto ptrTy = cc::PointerType::get(IntegerType::get(ctx, 8)); if (addThisPtr) inputTys.push_back(ptrTy); @@ -592,9 +618,15 @@ FunctionType factory::toHostSideFuncType(FunctionType funcTy, bool addThisPtr, if (onlyArithmeticMembers(strTy)) { // Empirical evidence shows that on aarch64, arguments are packed // into a single i64 or a [2 x i64] typed value based on the size - // of the struct. This is regardless of whether the value(s) are - // floating-point or not. - if (strTy.getBitSize() > 64) + // of the struct. The exception is when there are 2 elements and + // they are both float or both double. + if ((strTy.getMembers().size() == 2) && + (strTy.getMember(0) == strTy.getMember(1)) && + ((strTy.getMember(0) == Float32Type::get(ctx)) || + (strTy.getMember(0) == Float64Type::get(ctx)))) + inputTys.push_back( + cc::ArrayType::get(ctx, strTy.getMember(0), 2)); + else if (strTy.getBitSize() > 64) inputTys.push_back(cc::ArrayType::get(ctx, i64Ty, 2)); else inputTys.push_back(i64Ty); diff --git a/test/AST-Quake/calling_convention-aarch64.cpp b/test/AST-Quake/calling_convention-aarch64.cpp new file mode 100644 index 0000000000..174aaf3558 --- /dev/null +++ b/test/AST-Quake/calling_convention-aarch64.cpp @@ -0,0 +1,326 @@ +/******************************************************************************* + * Copyright (c) 2022 - 2024 NVIDIA Corporation & Affiliates. * + * All rights reserved. * + * * + * This source code and the accompanying materials are made available under * + * the terms of the Apache License 2.0 which accompanies this distribution. * + ******************************************************************************/ + +// This test is only valid for aarch64. +// RUN: if [ `uname -m` = "aarch64" ] ; then \ +// RUN: cudaq-quake %cpp_std %s | cudaq-opt | FileCheck %s ; fi + +#include +#include +#include + +// Tests the host-side signatures of various spec supported kernel arguments and +// results. This file tests the x86_64 calling convention. Other architectures +// differ in their calling conventions. + +//===----------------------------------------------------------------------===// +// test all the basic arithmetic types to deny any regressions. + +struct T0 { + void operator()() __qpu__ {} +}; + +struct T1 { + void operator()(double arg) __qpu__ {} +}; + +struct T2 { + void operator()(float arg) __qpu__ {} +}; + +struct T3 { + void operator()(long long arg) __qpu__ {} +}; + +struct T4 { + void operator()(long arg) __qpu__ {} +}; + +struct T5 { + void operator()(int arg) __qpu__ {} +}; + +struct T6 { + void operator()(short arg) __qpu__ {} +}; + +struct T7 { + void operator()(char arg) __qpu__ {} +}; + +struct T8 { + void operator()(bool arg) __qpu__ {} +}; + +// CHECK-LABEL: func.func @_ZN2T0clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr) { +// CHECK-LABEL: func.func @_ZN2T1clEd( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: f64) { +// CHECK-LABEL: func.func @_ZN2T2clEf( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: f32) { +// CHECK-LABEL: func.func @_ZN2T3clEx( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i64) { +// CHECK-LABEL: func.func @_ZN2T4clEl( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i64) { +// CHECK-LABEL: func.func @_ZN2T5clEi( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i32) { +// CHECK-LABEL: func.func @_ZN2T6clEs( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i16) { +// CHECK-LABEL: func.func @_ZN2T7clEc( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i8) { +// CHECK-LABEL: func.func @_ZN2T8clEb( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i1) { + +struct R0 { + void operator()() __qpu__ {} +}; + +struct R1 { + double operator()() __qpu__ { return {}; } +}; + +struct R2 { + float operator()() __qpu__ { return {}; } +}; + +struct R3 { + long long operator()() __qpu__ { return {}; } +}; + +struct R4 { + long operator()() __qpu__ { return {}; } +}; + +struct R5 { + int operator()() __qpu__ { return {}; } +}; + +struct R6 { + short operator()() __qpu__ { return {}; } +}; + +struct R7 { + char operator()() __qpu__ { return {}; } +}; + +struct R8 { + bool operator()() __qpu__ { return {}; } +}; + +// CHECK-LABEL: func.func @_ZN2R0clEv(%arg0: !cc.ptr) { +// CHECK-LABEL: func.func @_ZN2R1clEv(%arg0: !cc.ptr) -> f64 { +// CHECK-LABEL: func.func @_ZN2R2clEv(%arg0: !cc.ptr) -> f32 { +// CHECK-LABEL: func.func @_ZN2R3clEv(%arg0: !cc.ptr) -> i64 { +// CHECK-LABEL: func.func @_ZN2R4clEv(%arg0: !cc.ptr) -> i64 { +// CHECK-LABEL: func.func @_ZN2R5clEv(%arg0: !cc.ptr) -> i32 { +// CHECK-LABEL: func.func @_ZN2R6clEv(%arg0: !cc.ptr) -> i16 { +// CHECK-LABEL: func.func @_ZN2R7clEv(%arg0: !cc.ptr) -> i8 { +// CHECK-LABEL: func.func @_ZN2R8clEv(%arg0: !cc.ptr) -> i1 { + +//===----------------------------------------------------------------------===// +// structs that are less than 128 bits. +// arguments may be merged into 1 register or passed in pair of registers. +// results are returned in registers. + +struct G0 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct G1 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct G2 { + std::pair operator()(std::pair, + std::pair) __qpu__ { + return {}; + } +}; + +struct G3 { + std::pair operator()(std::pair) __qpu__ { + return {}; + } +}; + +struct BB { + bool _1; + bool _2; + bool _3; +}; + +BB glue0(); + +struct G4 { + std::pair operator()(BB) __qpu__ { return {}; } +}; + +struct II { + int _1; + int _2; + int _3; +}; + +II glue1(); + +struct G5 { + std::pair operator()(II) __qpu__ { return {}; } +}; + +struct CC { + char _1; + unsigned char _2; + signed char _3; +}; + +CC glue2(); + +struct G6 { + std::pair operator()(CC) __qpu__ { return {}; } +}; + +struct G7 { + BB operator()(BB, II, CC) __qpu__ { return glue0(); } +}; + +struct G8 { + II operator()(II, CC, BB) __qpu__ { return glue1(); } +}; + +struct G9 { + CC operator()(CC, BB, II) __qpu__ { return glue2(); } +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2G0clESt4pairIddE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: !cc.array) -> i16 +// CHECK-LABEL: func.func @_ZN2G1clESt4pairIffE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: !cc.array) -> i16 +// CHECK-LABEL: func.func @_ZN2G2clESt4pairIllES0_IidE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: !cc.array, %[[VAL_2:.*]]: !cc.array) -> i32 +// CHECK-LABEL: func.func @_ZN2G3clESt4pairIdbE( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: !cc.array) -> i32 +// CHECK-LABEL: func.func @_ZN2G4clE2BB( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i64) -> i64 +// CHECK-LABEL: func.func @_ZN2G5clE2II( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: !cc.array) -> !cc.array +// CHECK-LABEL: func.func @_ZN2G6clE2CC( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i64) -> !cc.array +// CHECK-LABEL: func.func @_ZN2G7clE2BB2II2CC( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: i64, +// CHECK-SAME: %[[VAL_3:.*]]: !cc.array, %[[VAL_4:.*]]: i64) -> i24 +// CHECK-LABEL: func.func @_ZN2G8clE2II2CC2BB( +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, %[[VAL_2:.*]]: !cc.array, +// CHECK-SAME: %[[VAL_4:.*]]: i64, %[[VAL_5:.*]]: i64) -> !cc.array +// CHECK-LABEL: func.func @_ZN2G9clE2CC2BB2II( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: i64, %[[VAL_2:.*]]: i64, %[[VAL_3:.*]]: !cc.array) -> i24 +// clang-format on + +//===----------------------------------------------------------------------===// +// std::vector - these get converted to sret and byval ptrs on host side. + +std::vector make_believe(); + +struct V0 { + std::vector operator()() __qpu__ { return make_believe(); } +}; + +std::vector make_coffee(); + +struct V1 { + std::vector operator()(std::vector) __qpu__ { + return make_coffee(); + } +}; + +std::vector> make_crazy(); + +struct V2 { + std::vector> operator()(std::vector, + std::vector) __qpu__ { + return make_crazy(); + } +}; + +struct V3 { + void operator()(std::vector, std::vector) __qpu__ {} +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2V0clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>> {llvm.sret = !cc.struct<{!cc.ptr, !cc.ptr, !cc.ptr}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr) +// CHECK-LABEL: func.func @_ZN2V1clESt6vectorIdSaIdEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>> {llvm.sret = !cc.struct<{!cc.ptr, !cc.ptr, !cc.ptr}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// CHECK-LABEL: func.func @_ZN2V2clESt6vectorIfSaIfEES0_IsSaIsEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr>, !cc.ptr>, !cc.ptr>}>> {llvm.sret = !cc.struct<{!cc.ptr>, !cc.ptr>, !cc.ptr>}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>, +// CHECK-SAME: %[[VAL_3:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// CHECK-LABEL: func.func @_ZN2V3clESt6vectorIlSaIlEES0_IbSaIbEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.ptr, !cc.ptr}>>) +// clang-format on + +//===----------------------------------------------------------------------===// +// structs that are more than 128 bits. These get converted to sret or byval +// ptrs on the host side. + +struct B0 { + void operator()(std::tuple) __qpu__ {} +}; + +struct BG { + float _1[4]; + int _2[5]; +}; + +BG make_sausage(); + +struct B1 { + BG operator()() __qpu__ { return make_sausage(); } +}; + +std::tuple make_interesting(); + +struct B2 { + std::tuple operator()(BG) __qpu__ { + return make_interesting(); + } +}; + +struct BA { + bool _1[64]; +}; + +struct B3 { + BA operator()(BA arg) __qpu__ { return arg; } +}; + +// clang-format off +// CHECK-LABEL: func.func @_ZN2B0clESt5tupleIJdicfsEE( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, %[[VAL_1:.*]]: !cc.ptr>) { +// CHECK-LABEL: func.func @_ZN2B1clEv( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr, !cc.array} [288,4]>> {llvm.sret = !cc.struct<"BG" {!cc.array, !cc.array} [288,4]>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr) +// CHECK-LABEL: func.func @_ZN2B2clE2BG( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr> {llvm.sret = !cc.struct<{f64, f64, i16, f32, i8, i32}>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr, !cc.array} [288,4]>>) +// CHECK-LABEL: func.func @_ZN2B3clE2BA( +// CHECK-SAME: %[[VAL_0:.*]]: !cc.ptr} [512,1]>> {llvm.sret = !cc.struct<"BA" {!cc.array} [512,1]>}, +// CHECK-SAME: %[[VAL_1:.*]]: !cc.ptr, +// CHECK-SAME: %[[VAL_2:.*]]: !cc.ptr} [512,1]>>) +// clang-format on From 490930d4543502e2baa568bfc2c6844edc1d08ba Mon Sep 17 00:00:00 2001 From: Bettina Heim Date: Thu, 17 Oct 2024 01:32:33 +0200 Subject: [PATCH 3/5] Workflow changes to facilitate maintenance (#2269) --- .github/workflows/ci.yml | 5 +- .github/workflows/clean_up.yml | 2 +- .github/workflows/codeql.yml | 81 +++++++++++++++++++ .../workflows/config/spellcheck_config.yml | 3 +- .github/workflows/dco_merge_queue.yml | 17 ++++ .github/workflows/gh_registry.yml | 1 - .github/workflows/integration_tests.yml | 17 ++-- .github/workflows/nvqc_regression_tests.yml | 8 +- .github/workflows/publishing.yml | 2 + .github/workflows/repo_checks.yml | 5 +- 10 files changed, 120 insertions(+), 21 deletions(-) create mode 100644 .github/workflows/codeql.yml create mode 100644 .github/workflows/dco_merge_queue.yml diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c2bba1fddd..d94617a3b2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -15,7 +15,10 @@ on: push: branches: - "pull-request/[0-9]+" - + merge_group: + types: + - checks_requested + name: CI # do not change name without updating workflow_run triggers concurrency: diff --git a/.github/workflows/clean_up.yml b/.github/workflows/clean_up.yml index 1d154c9f28..9bc2dbc77e 100644 --- a/.github/workflows/clean_up.yml +++ b/.github/workflows/clean_up.yml @@ -42,7 +42,7 @@ jobs: /repos/nvidia/cuda-quantum/git/refs/heads/$branch done env: - GH_TOKEN: ${{ github.token }} + GH_TOKEN: ${{ secrets.REPO_BOT_ACCESS_TOKEN || github.token }} draft_releases: name: Delete draft release diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000000..338801009c --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,81 @@ +# This workflow is used to override the set of languages analyzed, +# and to provide custom queries or build logic. + +name: "CodeQL Advanced" + +on: + push: + branches: + - 'main' + - 'releases/*' + - 'experimental/*' + - 'features/*' + pull_request: + branches: + - 'main' + - 'releases/*' + - 'experimental/*' + - 'features/*' + schedule: + - cron: '37 9 * * 2' + +jobs: + analyze: + name: Analyze (${{ matrix.language }}) + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners (GitHub.com only) + # Consider using larger runners or machines with greater resources for possible analysis time improvements. + runs-on: 'ubuntu-latest' + permissions: + security-events: write + # read permissions below are only required for workflows in private repositories + actions: read + contents: read + + strategy: + fail-fast: false + matrix: + include: + # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, + # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. + # For analyzing a compiled language, you can modify the 'build-mode' for that language to customize how + # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages + - language: c-cpp + build-mode: none # Consider setting this to manual and providing a build command to get more accurate scanning + - language: python + build-mode: none + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + build-mode: ${{ matrix.build-mode }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + # If the analyze step fails for one of the languages you are analyzing with + # "We were unable to automatically build your code", modify the matrix above + # to set the build mode to "manual" for that language. Then modify this step + # to build your code. + - if: matrix.build-mode == 'manual' + shell: bash + run: | + echo 'If you are using a "manual" build mode for one or more of the' \ + 'languages you are analyzing, replace this with the commands to build' \ + 'your code, for example:' + exit 1 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/.github/workflows/config/spellcheck_config.yml b/.github/workflows/config/spellcheck_config.yml index e52e7b22d8..24642537f3 100644 --- a/.github/workflows/config/spellcheck_config.yml +++ b/.github/workflows/config/spellcheck_config.yml @@ -45,7 +45,8 @@ matrix: - name: rst sources: - - '**/*.rst' + - '**/*.rst|!docs/sphinx/_templates/**/*.rst' + glob_flags: N|G|B expect_match: false aspell: lang: en diff --git a/.github/workflows/dco_merge_queue.yml b/.github/workflows/dco_merge_queue.yml new file mode 100644 index 0000000000..531b516eb0 --- /dev/null +++ b/.github/workflows/dco_merge_queue.yml @@ -0,0 +1,17 @@ +on: + # Do not add any triggers here! + merge_group: + types: + - checks_requested + +name: DCO (merge queue) + +permissions: {} # no permissions needed. + +jobs: + DCO: + runs-on: ubuntu-latest + steps: + - run: + echo "Enable merge_queue check to pass." + echo "No checks were performed; these checks were already performed during PR workflows." \ No newline at end of file diff --git a/.github/workflows/gh_registry.yml b/.github/workflows/gh_registry.yml index da51650fbf..eca25851bf 100644 --- a/.github/workflows/gh_registry.yml +++ b/.github/workflows/gh_registry.yml @@ -8,7 +8,6 @@ name: Update GHCR jobs: ghcr_config: name: Read GHCR config - if: github.event_name == 'workflow_dispatch' || vars.enabled_workflows == 'all' runs-on: ubuntu-latest permissions: contents: read diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index f2ef1ada4c..a3056c056f 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -34,7 +34,7 @@ on: cudaq_test_image: type: string required: false - default: 'nvcr.io/nvidia/nightly/cuda-quantum:latest' # If changed, update env defaults, too + default: '' # picked up from repo variable if not provided description: 'CUDA Quantum image to run the tests in. Default to the latest CUDA Quantum nightly image' commit_sha: type: string @@ -43,7 +43,7 @@ on: cudaq_nvqc_deploy_image: type: string required: false - default: 'nvcr.io/nvidia/nightly/cuda-quantum:latest' # If changed, update env defaults, too + default: '' # same as cudaq_test_image if not provided description: 'CUDA Quantum image to use for NVQC deployment to NVCF. Default to the latest CUDA Quantum nightly image' workflow_id: type: string @@ -52,7 +52,6 @@ on: python_version: type: choice required: true - default: '3.10' # If changed, update env defaults, too description: 'Python version to run wheel test' options: - '3.8' @@ -70,10 +69,6 @@ env: NVQC_FUNCTION_ID: 3bfa0342-7d2a-4f1b-8e81-b6608d28ca7d # :::: NGC_NVQC_DEPLOYMENT_SPEC: GFN:L40S:gl40s_1.br25_2xlarge:1:1 - # If vars below are changed, it is recommended to also update the - # workflow_dispatch defaults above so they stay in sync. - cudaq_test_image: nvcr.io/nvidia/nightly/cuda-quantum:latest - cudaq_nvqc_deploy_image: nvcr.io/nvidia/nightly/cuda-quantum:latest python_version: '3.10' jobs: @@ -84,11 +79,13 @@ jobs: runs-on: ubuntu-latest outputs: cudaq_test_image: ${{ steps.vars.outputs.cudaq_test_image }} + cudaq_nvqc_deploy_image: ${{ steps.vars.outputs.cudaq_nvqc_deploy_image }} steps: - name: Set variables id: vars run: | - echo "cudaq_test_image=${{ inputs.cudaq_test_image || env.cudaq_test_image }}" >> $GITHUB_OUTPUT + echo "cudaq_test_image=${{ inputs.cudaq_test_image || vars.cudaq_test_image }}" >> $GITHUB_OUTPUT + echo "cudaq_nvqc_deploy_image=${{ inputs.cudaq_nvqc_deploy_image || vars.cudaq_test_image }}" >> $GITHUB_OUTPUT metadata: name: Retrieve commit info @@ -112,7 +109,7 @@ jobs: build_nvqc_image: name: Build NVQC deployment image runs-on: ubuntu-latest - needs: metadata + needs: [setup, metadata] environment: ghcr-deployment if: (inputs.target == 'nvqc' || github.event_name == 'schedule' || inputs.target == 'nightly') steps: @@ -153,7 +150,7 @@ jobs: context: . file: ./docker/release/cudaq.nvqc.Dockerfile build-args: | - base_image=${{ inputs.cudaq_nvqc_deploy_image || env.cudaq_nvqc_deploy_image }} + base_image=${{ needs.setup.outputs.cudaq_nvqc_deploy_image }} tags: nvcr.io/${{ env.NGC_QUANTUM_ORG }}/${{ env.NGC_QUANTUM_TEAM }}/cuda-quantum:nightly platforms: linux/amd64 provenance: false diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml index 10cab6a077..857343501c 100644 --- a/.github/workflows/nvqc_regression_tests.yml +++ b/.github/workflows/nvqc_regression_tests.yml @@ -12,7 +12,7 @@ on: cudaq_test_image: type: string required: false - default: 'nvcr.io/nvidia/nightly/cuda-quantum:latest' # If changed, update env defaults, too + default: '' # picked up from repo variable if not provided description: 'CUDA Quantum image to run the tests in. Default to the latest CUDA Quantum nightly image' commit_sha: type: string @@ -25,7 +25,6 @@ on: python_version: type: choice required: true - default: '3.10' # If changed, update env defaults, too description: 'Python version to run wheel test' options: - '3.8' @@ -37,9 +36,6 @@ on: - cron: 0 3 * * * env: - # If vars below are changed, it is recommended to also update the - # workflow_dispatch defaults above so they stay in sync. - cudaq_test_image: nvcr.io/nvidia/nightly/cuda-quantum:latest python_version: '3.10' jobs: @@ -54,7 +50,7 @@ jobs: - name: Set variables id: vars run: | - echo "cudaq_test_image=${{ inputs.cudaq_test_image || env.cudaq_test_image }}" >> $GITHUB_OUTPUT + echo "cudaq_test_image=${{ inputs.cudaq_test_image || vars.cudaq_test_image }}" >> $GITHUB_OUTPUT metadata: name: Retrieve commit info diff --git a/.github/workflows/publishing.yml b/.github/workflows/publishing.yml index 695529c3bc..72d23b7062 100644 --- a/.github/workflows/publishing.yml +++ b/.github/workflows/publishing.yml @@ -235,6 +235,8 @@ jobs: for info_file in ${{ join(fromJson(steps.artifacts.outputs.installers).info_files, ' ') }}; do delete_staging_branch $info_file '${{ steps.artifacts.outputs.installers }}' done + env: + GH_TOKEN: ${{ secrets.REPO_BOT_ACCESS_TOKEN }} - name: Retrieve id: assets_retrieval diff --git a/.github/workflows/repo_checks.yml b/.github/workflows/repo_checks.yml index f5d92d5b1f..d5f4d324c5 100644 --- a/.github/workflows/repo_checks.yml +++ b/.github/workflows/repo_checks.yml @@ -1,6 +1,9 @@ on: workflow_dispatch: pull_request: + merge_group: + types: + - checks_requested name: "Basic content checks" @@ -85,7 +88,7 @@ jobs: create_output cxx_examples 'docs/sphinx/examples/**/*.cpp' create_output python '*.py :!:python/tests :!:test :!:targettests :!:tpls :!:docs/sphinx/conf.py' create_output markdown '*.md :!:tpls' - create_output rst '*.rst :!:tpls' + create_output rst '*.rst :!:tpls :!:docs/sphinx/_templates/**/*.rst' echo "json=$(echo $json)" >> $GITHUB_OUTPUT formatting: From 5fbbfcf9bb1b44138f02bf5d6c082b6e3ab062d0 Mon Sep 17 00:00:00 2001 From: Zohim Chandani <60148725+zohimchandani@users.noreply.github.com> Date: Thu, 17 Oct 2024 01:56:49 +0100 Subject: [PATCH 4/5] Restructure of examples and applications (#2218) * new tutorial * updates * Add ipywidgets to devdeps image for docs generation * Formatting and spelling * cleanup of docs * restructure of examples and applications pages * adding a blank line * ran digitized_counterdiabatic_qaoa.ipynb to save the output * removing tests as the Python (*.py) files have been moved out (or condensed) as part of PR #2218 * commenting out nvidia target and fixing duplicate htest definition * saving output from advanced vqe * updates * renaming tutorials -> applications * purging unused rst files * * Adding missing image in the images folder * Correcting the image path in ipynb * Setting nbsphinx_widgets_path to empty string (to resolve this warning generated during sphinx docs generation WARNING: nbsphinx_widgets_path not given and ipywidgets module unavailable) * adding double quotes around the target * changing number of qpus to pass CI * adding intro.py and making changes to rst file to include ipynb file * moving applications, examples, targets into dedicated folders * changing path * adding applications and targets directories * changing path for hardware providers and few python files * fixing path * adding paths * adding applications path * adding python notebook directly in rst file * removign maxdepth * fixing paths * adding targets path * adding paths * removing providers and tutorials path * clearing output * adding a building_kernels python file in order to render it under tabs * correcting spelling check and formatting * excluding /building_kernels.py * adding missing examples and removing warnings from the output * commenting nvidia target * removing nbsphinx_widgets_path * changing VQE title * DCO Remediation Commit for Sachin Pisal I, Sachin Pisal , hereby add my Signed-off-by to this commit: 7a4adc5f4667db221c5240c4db7a6c2a64dd6fbc I, Sachin Pisal , hereby add my Signed-off-by to this commit: d8f441077b8c43059a01002fd4f36a7f0a041e38 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 6ce66956780d64cae98786fe9f106a728f9febd1 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 636bcc0ae4acd254cfe71ec27cedb952117c5afa I, Sachin Pisal , hereby add my Signed-off-by to this commit: 9321724fac88590616d3846c241766fa594a015b I, Sachin Pisal , hereby add my Signed-off-by to this commit: 72ead24ddc77a79015aec084b4dc18de74b110f0 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 5517077919e97c417c4de8352c0c94c2e1f32eea I, Sachin Pisal , hereby add my Signed-off-by to this commit: 0eb6b444eb5b3a687e6fd64529ee9223aaa2870e I, Sachin Pisal , hereby add my Signed-off-by to this commit: 91c317b7f69490cb98c66bceb987407456f5afd5 I, Sachin Pisal , hereby add my Signed-off-by to this commit: ee09e73f34005167169d268162ba16b4d62f38f0 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 9f17e737eddb740f74e0ee39f1669cac4bc3f32b I, Sachin Pisal , hereby add my Signed-off-by to this commit: e47acd2654b0fad608cb056b97c03c3bf85eedc9 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 4e24921337a44f1a9994d810767a8add571a91ab I, Sachin Pisal , hereby add my Signed-off-by to this commit: 1f412a6beeb1de6a0330eafde8ff41996a3228e9 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 3b580a12775e03221952192e619c204186912247 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 8fd380ec8cfc7816267bc959704a54bcd8aef6c5 I, Sachin Pisal , hereby add my Signed-off-by to this commit: f1df271e3e402b488e57f6b50a3d72b701ab422c I, Sachin Pisal , hereby add my Signed-off-by to this commit: 9eb9ab8262b5f75bdf356418f1ca9b5a40cedeb8 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 2eb4882ee7f124e14c2bc69f642a0c0918c24a1e I, Sachin Pisal , hereby add my Signed-off-by to this commit: ffd54a59fb33993857be33961839bd134540ad10 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 8b702e6a4c00fed996f8c785173ae2a9bf743468 I, Sachin Pisal , hereby add my Signed-off-by to this commit: f2cb3c6a111cbdaf8e2a863da9f46447f2b6e4f6 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 00490c876572d6217d0113f848d00eecb9815179 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 0e5ec8e0651b5bc22bb1c1daffbb2cc5303632d0 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 7553eedbae56203f136d9e90c64ffaa8339bfe4b I, Sachin Pisal , hereby add my Signed-off-by to this commit: 8782cef46beb1a3e0183f76ff7d2e088e3f5a8eb I, Sachin Pisal , hereby add my Signed-off-by to this commit: 2afb6bda42a353331fac56c5e4700705663b6c50 I, Sachin Pisal , hereby add my Signed-off-by to this commit: 17ac8008ee9958384151589fa50fe88c81bac3ee I, Sachin Pisal , hereby add my Signed-off-by to this commit: 79879eab6b0044a84bfaa1976a1e290d4dc2dfcc I, Sachin Pisal , hereby add my Signed-off-by to this commit: 5ebd7177ed870f4a62c3dbcf067afb42c260c51f I, Sachin Pisal , hereby add my Signed-off-by to this commit: b8076e8f08c9404890cac383ef8f5754729f6fa9 Signed-off-by: Sachin Pisal * DCO Remediation Commit for Ben Howe I, Ben Howe , hereby add my Signed-off-by to this commit: 6153db0a2122bfb7767c9a55071e7be26f77df96 I, Ben Howe , hereby add my Signed-off-by to this commit: 078c3eda9651ea1fd420dbe93fb5156672b76473 Signed-off-by: Ben Howe * sign * * Adding applications and targets folder * Removing unnecessary binary files * adding pycudaq test for intro.py * removing warnings from the cell output --------- Signed-off-by: Sachin Pisal Signed-off-by: Ben Howe Co-authored-by: Ben Howe Co-authored-by: Ben Howe <141149032+bmhowe23@users.noreply.github.com> Co-authored-by: Sachin Pisal Co-authored-by: root --- .../workflows/config/spellcheck_config.yml | 2 + .github/workflows/integration_tests.yml | 10 +- .github/workflows/nvqc_regression_tests.yml | 6 +- .github/workflows/publishing.yml | 6 +- .github/workflows/python_wheels.yml | 2 +- .github/workflows/repo_checks.yml | 2 +- .licenserc.yaml | 2 + Developing.md | 8 +- docker/build/assets.Dockerfile | 2 + docker/release/cudaq.Dockerfile | 5 +- docker/test/installer/linux.Dockerfile | 2 + docker/test/wheels/debian.Dockerfile | 2 + docker/test/wheels/fedora.Dockerfile | 2 + docker/test/wheels/opensuse.Dockerfile | 2 + docker/test/wheels/redhat.Dockerfile | 2 + docker/test/wheels/ubuntu.Dockerfile | 2 + docs/CMakeLists.txt | 53 +- .../cpp}/amplitude_estimation.cpp | 0 .../cpp}/bernstein_vazirani.cpp | 0 .../cpp}/grover.cpp | 0 .../cpp}/iterative_qpe.cpp | 0 .../cpp}/phase_estimation.cpp | 0 .../cpp}/qaoa_maxcut.cpp | 0 .../cpp}/random_walk_qpe.cpp | 0 .../cpp}/trotter_kernel_mode.cpp | 0 .../cpp}/vqe_h2.cpp | 0 docs/sphinx/applications/python/afqmc.ipynb | 533 +++++++++++++ .../python}/afqmc_src/geo_fenta.xyz | 0 .../python}/afqmc_src/geo_o3.xyz | 0 .../python}/afqmc_src/utils_ipie.py | 0 .../python}/afqmc_src/vqe_cudaq_qnp.py | 0 .../python/bernstein_vazirani.ipynb | 248 ++++++ .../python}/cost_minimization.ipynb | 19 +- .../python}/deutschs_algorithm.ipynb | 11 +- .../digitized_counterdiabatic_qaoa.ipynb | 429 ++++++++++ .../divisive_clustering_coresets.ipynb} | 90 ++- .../divisive_clustering.py | 0 .../main_divisive_clustering.py | 0 .../applications/python/hadamard_test.ipynb | 472 +++++++++++ .../python}/hybrid_qnns.ipynb | 10 +- .../applications/python/images/QKSD.png | Bin 0 -> 339921 bytes .../python/images/QVprocedure.png | Bin 0 -> 295104 bytes .../sphinx/applications/python/images/VQE.png | Bin 0 -> 40678 bytes .../python}/images/automatic_batching.png | Bin .../sphinx/applications/python/images/cas.png | Bin 0 -> 16548 bytes .../python}/images/circsplit.png | Bin .../python}/images/circuit_encoding.png | Bin .../python}/images/deutsch.png | Bin .../applications/python/images/docking.png | Bin 0 -> 272818 bytes .../applications/python/images/gate-fuse.png | Bin 0 -> 49959 bytes .../python}/images/hsplit.png | Bin .../applications/python/images/htest.png | Bin 0 -> 17786 bytes .../python/images/htestfactored.png | Bin 0 -> 17162 bytes .../python}/images/hybrid.png | Bin .../python}/images/inverse_qft.png | Bin .../applications/python/images/kakdecomp.png | Bin 0 -> 58288 bytes .../python/images/krylovcircuit.png | Bin 0 -> 25423 bytes .../python}/images/max-cut-illustration.png | Bin .../python}/images/oracle.png | Bin .../python/images/parametershift.png | Bin 0 -> 11311 bytes .../applications/python/images/partition.png | Bin 0 -> 133261 bytes .../python}/images/pipeline_simplified.png | Bin .../python}/images/qaoa-circuit-layers.png | Bin .../python}/images/qaoa-problem-kernel.png | Bin .../python}/images/qft.png | Bin .../applications/python/images/qvplot.png | Bin 0 -> 25105 bytes .../python}/images/shor_circuit.png | Bin .../python/images/statehandle.png | Bin 0 -> 71875 bytes .../python/images/teleportation.png | Bin 0 -> 95088 bytes docs/sphinx/applications/python/krylov.ipynb | 628 +++++++++++++++ .../python}/qaoa.ipynb | 0 .../python}/quantum_fourier_transform.ipynb | 12 +- .../python/quantum_teleportation.ipynb | 396 ++++++++++ .../applications/python/quantum_volume.ipynb | 401 ++++++++++ .../python}/readout_error_mitigation.ipynb | 94 ++- .../python/shors.ipynb} | 57 +- docs/sphinx/applications/python/trotter.ipynb | 288 +++++++ ...nitary_compilation_diffusion_models.ipynb} | 120 +-- .../python}/vqe.ipynb | 2 +- .../applications/python/vqe_advanced.ipynb | 741 ++++++++++++++++++ docs/sphinx/conf.py | 4 +- .../tutorials => }/images/Bloch_sphere.png | Bin docs/sphinx/examples/images/backends.png | Bin 0 -> 85239 bytes .../tutorials => }/images/circuit_pdf.png | Bin docs/sphinx/examples/images/gate-fuse.png | Bin 0 -> 49959 bytes docs/sphinx/examples/images/gatefusion.png | Bin 0 -> 104569 bytes docs/sphinx/examples/images/gates.png | Bin 0 -> 106520 bytes docs/sphinx/examples/images/krylovcircuit.png | Bin 0 -> 25423 bytes docs/sphinx/examples/images/mqpumgpu.png | Bin 0 -> 96260 bytes docs/sphinx/examples/python/advanced_vqe.py | 72 -- .../examples/python/bernstein_vazirani.py | 81 -- .../examples/python/building_kernels.ipynb | 420 ++++++++++ .../examples/python/building_kernels.py | 262 +++++++ .../examples/python/cuquantum_backends.py | 27 - .../examples/python/executing_kernels.ipynb | 284 +++++++ .../examples/python/expectation_values.py | 23 - .../examples/python/measuring_kernels.ipynb | 116 +++ .../python/multi_controlled_operations.py | 2 +- .../python/noise_amplitude_damping.py | 50 -- docs/sphinx/examples/python/noise_bit_flip.py | 46 -- .../examples/python/noise_depolarization.py | 44 -- .../examples/python/noise_kraus_operator.py | 62 -- .../examples/python/noise_phase_flip.py | 48 -- .../{tutorials => }/noisy_simulations.ipynb | 18 +- docs/sphinx/examples/python/operators.ipynb | 99 +++ .../python/optimizers_gradients.ipynb | 303 +++++++ .../python/performance_optimizations.ipynb | 52 ++ docs/sphinx/examples/python/qaoa_maxcut.py | 78 -- docs/sphinx/examples/python/simple_vqe.py | 47 -- .../examples/python/trotter_kernel_mode.py | 128 --- .../examples/python/tutorials/H2-MRQKS.ipynb | 458 ----------- .../examples/python/tutorials/afqmc.ipynb | 531 ------------- .../python/tutorials/hadamard_test.ipynb | 310 -------- .../maximum_vertex_weight_clique.ipynb | 325 -------- .../python/tutorials/visualization.ipynb | 301 ------- .../tutorials/vqe_water_active_space.ipynb | 235 ------ .../examples/python/visualization.ipynb | 394 ++++++++++ docs/sphinx/index.rst | 2 +- docs/sphinx/specification/cudaq/examples.rst | 6 +- .../cpp/providers => targets/cpp}/ionq.cpp | 0 .../cpp/providers => targets/cpp}/iqm.cpp | 0 .../providers => targets/cpp}/nvqc_qml.cpp | 0 .../providers => targets/cpp}/nvqc_sample.cpp | 0 .../providers => targets/cpp}/nvqc_state.cpp | 0 .../providers => targets/cpp}/nvqc_vqe.cpp | 0 .../cpp/providers => targets/cpp}/orca.cpp | 0 .../providers => targets/cpp}/orca_mqpu.cpp | 0 .../providers => targets/cpp}/photonics.cpp | 0 .../cpp}/photonics_tbi.cpp | 0 .../providers => targets/cpp}/quantinuum.cpp | 0 .../providers => targets/python}/ionq.py | 0 .../providers => targets/python}/iqm.py | 0 .../providers => targets/python}/nvqc_mgpu.py | 0 .../python}/nvqc_sample.py | 0 .../python}/nvqc_state.py | 0 .../providers => targets/python}/nvqc_vqe.py | 0 .../providers => targets/python}/oqc.py | 0 .../providers => targets/python}/orca.py | 0 .../providers => targets/python}/orca_mqpu.py | 0 .../providers => targets/python}/photonics.py | 0 .../python}/photonics_tbi.py | 0 .../python}/quantinuum.py | 0 docs/sphinx/using/applications.rst | 28 + .../using/examples/bernstein_vazirani.rst | 23 - .../using/examples/building_kernels.rst | 9 + docs/sphinx/using/examples/cuquantum.rst | 30 - docs/sphinx/using/examples/examples.rst | 14 +- .../using/examples/expectation_values.rst | 16 - .../using/examples/hardware_providers.rst | 18 +- docs/sphinx/using/examples/multi_control.rst | 8 +- .../using/examples/multi_gpu_workflows.rst | 4 +- .../using/examples/noisy_simulation.rst | 65 -- docs/sphinx/using/examples/qaoa.rst | 22 - docs/sphinx/using/examples/vqe.rst | 29 - docs/sphinx/using/tutorials.rst | 24 - scripts/run_all_spelling.sh | 2 +- scripts/validate_container.sh | 20 +- scripts/validate_wheel.sh | 4 +- 158 files changed, 6439 insertions(+), 3366 deletions(-) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/amplitude_estimation.cpp (100%) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/bernstein_vazirani.cpp (100%) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/grover.cpp (100%) rename docs/sphinx/{examples/cpp/other => applications/cpp}/iterative_qpe.cpp (100%) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/phase_estimation.cpp (100%) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/qaoa_maxcut.cpp (100%) rename docs/sphinx/{examples/cpp/other => applications/cpp}/random_walk_qpe.cpp (100%) rename docs/sphinx/{examples/cpp/other => applications/cpp}/trotter_kernel_mode.cpp (100%) rename docs/sphinx/{examples/cpp/algorithms => applications/cpp}/vqe_h2.cpp (100%) create mode 100644 docs/sphinx/applications/python/afqmc.ipynb rename docs/sphinx/{examples/python/tutorials => applications/python}/afqmc_src/geo_fenta.xyz (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/afqmc_src/geo_o3.xyz (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/afqmc_src/utils_ipie.py (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/afqmc_src/vqe_cudaq_qnp.py (100%) create mode 100644 docs/sphinx/applications/python/bernstein_vazirani.ipynb rename docs/sphinx/{examples/python/tutorials => applications/python}/cost_minimization.ipynb (98%) rename docs/sphinx/{examples/python/tutorials => applications/python}/deutschs_algorithm.ipynb (98%) create mode 100644 docs/sphinx/applications/python/digitized_counterdiabatic_qaoa.ipynb rename docs/sphinx/{examples/python/tutorials/Divisive_clustering.ipynb => applications/python/divisive_clustering_coresets.ipynb} (99%) rename docs/sphinx/{examples/python/tutorials => applications/python}/divisive_clustering_src/divisive_clustering.py (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/divisive_clustering_src/main_divisive_clustering.py (100%) create mode 100644 docs/sphinx/applications/python/hadamard_test.ipynb rename docs/sphinx/{examples/python/tutorials => applications/python}/hybrid_qnns.ipynb (99%) create mode 100644 docs/sphinx/applications/python/images/QKSD.png create mode 100644 docs/sphinx/applications/python/images/QVprocedure.png create mode 100644 docs/sphinx/applications/python/images/VQE.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/automatic_batching.png (100%) create mode 100644 docs/sphinx/applications/python/images/cas.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/circsplit.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/circuit_encoding.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/deutsch.png (100%) create mode 100644 docs/sphinx/applications/python/images/docking.png create mode 100644 docs/sphinx/applications/python/images/gate-fuse.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/hsplit.png (100%) create mode 100644 docs/sphinx/applications/python/images/htest.png create mode 100644 docs/sphinx/applications/python/images/htestfactored.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/hybrid.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/inverse_qft.png (100%) create mode 100644 docs/sphinx/applications/python/images/kakdecomp.png create mode 100644 docs/sphinx/applications/python/images/krylovcircuit.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/max-cut-illustration.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/oracle.png (100%) create mode 100644 docs/sphinx/applications/python/images/parametershift.png create mode 100644 docs/sphinx/applications/python/images/partition.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/pipeline_simplified.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/qaoa-circuit-layers.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/qaoa-problem-kernel.png (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/images/qft.png (100%) create mode 100644 docs/sphinx/applications/python/images/qvplot.png rename docs/sphinx/{examples/python/tutorials => applications/python}/images/shor_circuit.png (100%) create mode 100644 docs/sphinx/applications/python/images/statehandle.png create mode 100644 docs/sphinx/applications/python/images/teleportation.png create mode 100644 docs/sphinx/applications/python/krylov.ipynb rename docs/sphinx/{examples/python/tutorials => applications/python}/qaoa.ipynb (100%) rename docs/sphinx/{examples/python/tutorials => applications/python}/quantum_fourier_transform.ipynb (98%) create mode 100644 docs/sphinx/applications/python/quantum_teleportation.ipynb create mode 100644 docs/sphinx/applications/python/quantum_volume.ipynb rename docs/sphinx/{examples/python/tutorials => applications/python}/readout_error_mitigation.ipynb (98%) rename docs/sphinx/{examples/python/tutorials/Shors.ipynb => applications/python/shors.ipynb} (97%) create mode 100644 docs/sphinx/applications/python/trotter.ipynb rename docs/sphinx/{examples/python/tutorials/unitary_compilation.ipynb => applications/python/unitary_compilation_diffusion_models.ipynb} (95%) rename docs/sphinx/{examples/python/tutorials => applications/python}/vqe.ipynb (99%) create mode 100644 docs/sphinx/applications/python/vqe_advanced.ipynb rename docs/sphinx/examples/{python/tutorials => }/images/Bloch_sphere.png (100%) create mode 100644 docs/sphinx/examples/images/backends.png rename docs/sphinx/examples/{python/tutorials => }/images/circuit_pdf.png (100%) create mode 100644 docs/sphinx/examples/images/gate-fuse.png create mode 100644 docs/sphinx/examples/images/gatefusion.png create mode 100644 docs/sphinx/examples/images/gates.png create mode 100644 docs/sphinx/examples/images/krylovcircuit.png create mode 100644 docs/sphinx/examples/images/mqpumgpu.png delete mode 100644 docs/sphinx/examples/python/advanced_vqe.py delete mode 100644 docs/sphinx/examples/python/bernstein_vazirani.py create mode 100644 docs/sphinx/examples/python/building_kernels.ipynb create mode 100644 docs/sphinx/examples/python/building_kernels.py delete mode 100644 docs/sphinx/examples/python/cuquantum_backends.py create mode 100644 docs/sphinx/examples/python/executing_kernels.ipynb delete mode 100644 docs/sphinx/examples/python/expectation_values.py create mode 100644 docs/sphinx/examples/python/measuring_kernels.ipynb delete mode 100644 docs/sphinx/examples/python/noise_amplitude_damping.py delete mode 100644 docs/sphinx/examples/python/noise_bit_flip.py delete mode 100644 docs/sphinx/examples/python/noise_depolarization.py delete mode 100644 docs/sphinx/examples/python/noise_kraus_operator.py delete mode 100644 docs/sphinx/examples/python/noise_phase_flip.py rename docs/sphinx/examples/python/{tutorials => }/noisy_simulations.ipynb (91%) create mode 100644 docs/sphinx/examples/python/operators.ipynb create mode 100644 docs/sphinx/examples/python/optimizers_gradients.ipynb create mode 100644 docs/sphinx/examples/python/performance_optimizations.ipynb delete mode 100644 docs/sphinx/examples/python/qaoa_maxcut.py delete mode 100644 docs/sphinx/examples/python/simple_vqe.py delete mode 100644 docs/sphinx/examples/python/trotter_kernel_mode.py delete mode 100644 docs/sphinx/examples/python/tutorials/H2-MRQKS.ipynb delete mode 100644 docs/sphinx/examples/python/tutorials/afqmc.ipynb delete mode 100644 docs/sphinx/examples/python/tutorials/hadamard_test.ipynb delete mode 100644 docs/sphinx/examples/python/tutorials/maximum_vertex_weight_clique.ipynb delete mode 100644 docs/sphinx/examples/python/tutorials/visualization.ipynb delete mode 100644 docs/sphinx/examples/python/tutorials/vqe_water_active_space.ipynb create mode 100644 docs/sphinx/examples/python/visualization.ipynb rename docs/sphinx/{examples/cpp/providers => targets/cpp}/ionq.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/iqm.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/nvqc_qml.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/nvqc_sample.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/nvqc_state.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/nvqc_vqe.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/orca.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/orca_mqpu.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/photonics.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/photonics_tbi.cpp (100%) rename docs/sphinx/{examples/cpp/providers => targets/cpp}/quantinuum.cpp (100%) rename docs/sphinx/{examples/python/providers => targets/python}/ionq.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/iqm.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/nvqc_mgpu.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/nvqc_sample.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/nvqc_state.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/nvqc_vqe.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/oqc.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/orca.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/orca_mqpu.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/photonics.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/photonics_tbi.py (100%) rename docs/sphinx/{examples/python/providers => targets/python}/quantinuum.py (100%) create mode 100644 docs/sphinx/using/applications.rst delete mode 100644 docs/sphinx/using/examples/bernstein_vazirani.rst create mode 100644 docs/sphinx/using/examples/building_kernels.rst delete mode 100644 docs/sphinx/using/examples/cuquantum.rst delete mode 100644 docs/sphinx/using/examples/noisy_simulation.rst delete mode 100644 docs/sphinx/using/examples/qaoa.rst delete mode 100644 docs/sphinx/using/examples/vqe.rst delete mode 100644 docs/sphinx/using/tutorials.rst diff --git a/.github/workflows/config/spellcheck_config.yml b/.github/workflows/config/spellcheck_config.yml index 24642537f3..9497ef16f5 100644 --- a/.github/workflows/config/spellcheck_config.yml +++ b/.github/workflows/config/spellcheck_config.yml @@ -193,6 +193,8 @@ matrix: - name: cxx_examples sources: - 'docs/sphinx/examples/**/*.cpp' + - 'docs/sphinx/applications/cpp/*.cpp' + - 'docs/sphinx/targets/cpp/*.cpp' expect_match: false aspell: lang: en diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index a3056c056f..e8e8762102 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -487,7 +487,7 @@ jobs: export ORCA_ACCESS_URL='${{ secrets.ORCA_ACCESS_URL }}' set +e # Allow script to keep going through errors test_err_sum=0 - cpp_tests="docs/sphinx/examples/cpp/providers/orca.cpp docs/sphinx/examples/cpp/providers/orca_mqpu.cpp" + cpp_tests="docs/sphinx/targets/cpp/orca.cpp docs/sphinx/targets/cpp/orca_mqpu.cpp" for filename in $cpp_tests; do [ -e "$filename" ] || echo "::error::Couldn't find file ($filename)" nvq++ --target orca --orca-url $ORCA_ACCESS_URL $filename @@ -506,7 +506,7 @@ jobs: test_err_sum=$((test_err_sum+1)) fi done - python_tests="docs/sphinx/examples/python/providers/orca.py docs/sphinx/examples/python/providers/orca_mqpu.py" + python_tests="docs/sphinx/targets/python/orca.py docs/sphinx/targets/python/orca_mqpu.py" for filename in $python_tests; do [ -e "$filename" ] || echo "::error::Couldn't find file ($filename)" python3 $filename 1> /dev/null @@ -633,7 +633,7 @@ jobs: done # Test C++ examples with NVQC - for filename in `find examples/cpp/ -name '*.cpp'`; do + for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do if [[ "$filename" == *"nvqc"* ]]; then echo "$filename" nvqc_config="" @@ -669,7 +669,7 @@ jobs: # Test NVQC Python examples + Python MLIR execution tests (not IR tests) python3 -m pip install pytest - for ex in `find examples/python python/tests/mlir/target -name '*.py' -not -path '*/python/tutorials/*'`; do + for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do filename=$(basename -- "$ex") filename="${filename%.*}" echo "Testing $filename:" @@ -786,7 +786,7 @@ jobs: set +e # Allow script to keep going through errors python$python_version -m pip install pytest test_err_sum=0 - for ex in `find examples/python python/tests/mlir/target -name '*.py' -not -path '*/python/tutorials/*'`; do + for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do filename=$(basename -- "$ex") filename="${filename%.*}" echo "Testing $filename:" diff --git a/.github/workflows/nvqc_regression_tests.yml b/.github/workflows/nvqc_regression_tests.yml index 857343501c..a39150cfaa 100644 --- a/.github/workflows/nvqc_regression_tests.yml +++ b/.github/workflows/nvqc_regression_tests.yml @@ -154,7 +154,7 @@ jobs: done # Test C++ examples with NVQC - for filename in `find examples/cpp/ -name '*.cpp'`; do + for filename in `find examples/cpp/ applications/cpp/ targets/cpp/ -name '*.cpp'`; do if [[ "$filename" == *"nvqc"* ]]; then echo "$filename" nvqc_config="" @@ -190,7 +190,7 @@ jobs: # Test NVQC Python examples + Python MLIR execution tests (not IR tests) python3 -m pip install pytest - for ex in `find examples/python python/tests/mlir/target -name '*.py' -not -path '*/python/tutorials/*'`; do + for ex in `find examples/python python/tests/mlir/target -name '*.py'`; do filename=$(basename -- "$ex") filename="${filename%.*}" echo "Testing $filename:" @@ -304,7 +304,7 @@ jobs: set +e # Allow script to keep going through errors python$python_version -m pip install pytest test_err_sum=0 - for ex in `find examples/python python/tests/mlir/target -name '*.py' -not -path '*/python/tutorials/*'`; do + for ex in `find examples/python python/tests/mlir/target -name '*.py'; do filename=$(basename -- "$ex") filename="${filename%.*}" echo "Testing $filename:" diff --git a/.github/workflows/publishing.yml b/.github/workflows/publishing.yml index 72d23b7062..c0ae887f68 100644 --- a/.github/workflows/publishing.yml +++ b/.github/workflows/publishing.yml @@ -791,7 +791,7 @@ jobs: run: | chmod +x /tmp/install/install_cuda_quantum.* /tmp/install/install_cuda_quantum.* --accept - rm -rf examples && mv docs/sphinx/examples examples && rm -rf examples/python + rm -rf examples applications targets && mv docs/sphinx/examples examples && mv docs/sphinx/applications applications && mv docs/sphinx/targets targets && rm -rf examples/python && rm -rf applications/python && rm -rf targets/python GITHUB_STEP_SUMMARY=$GITHUB_STEP_SUMMARY \ bash -l scripts/validate_container.sh | tee /tmp/validation.out @@ -837,6 +837,8 @@ jobs: # Setup links for validate_wheel.sh script ln -s $GITHUB_WORKSPACE/scripts/validate_wheel.sh . ln -s $GITHUB_WORKSPACE/docs/sphinx/examples/python /tmp/examples + ln -s $GITHUB_WORKSPACE/docs/sphinx/applications/python /tmp/applications + ln -s $GITHUB_WORKSPACE/docs/sphinx/targets/python /tmp/targets ln -s $GITHUB_WORKSPACE/docs/sphinx/snippets/python /tmp/snippets ln -s $GITHUB_WORKSPACE/python/tests /tmp/tests ln -s $GITHUB_WORKSPACE/python/README.md . @@ -943,7 +945,7 @@ jobs: fi; \ done` - rm -rf examples && mv github-repo/docs/sphinx/examples examples + rm -rf examples applications targets && mv github-repo/docs/sphinx/examples examples && mv github-repo/docs/sphinx/applications applications && mv github-repo/docs/sphinx/targets targets mv github-repo/docs/notebook_validation.py . GITHUB_STEP_SUMMARY=$GITHUB_STEP_SUMMARY \ bash github-repo/scripts/validate_container.sh $backends_to_test | tee /tmp/validation.out diff --git a/.github/workflows/python_wheels.yml b/.github/workflows/python_wheels.yml index c60ba3baaf..7734356b51 100644 --- a/.github/workflows/python_wheels.yml +++ b/.github/workflows/python_wheels.yml @@ -316,7 +316,7 @@ jobs: docker run --rm -dit --name wheel-validation-examples wheel_validation:local status_sum=0 - for ex in `find docs/sphinx/examples/python -name '*.py' -not -path '*/providers/*' -not -path '*/divisive_clustering_src/*' -not -path '*/utils_ipie.py' -not -path '*/vqe_cudaq_qnp.py'`; do + for ex in `find docs/sphinx/examples/python -name '*.py' -not -path '*/building_kernels.py'`; do file="${ex#docs/sphinx/examples/python/}" echo "__Example ${file}:__" >> /tmp/validation.out (docker exec wheel-validation-examples bash -c "python${{ inputs.python_version }} /tmp/examples/$file" >> /tmp/validation.out) && success=true || success=false diff --git a/.github/workflows/repo_checks.yml b/.github/workflows/repo_checks.yml index d5f4d324c5..358bba19ec 100644 --- a/.github/workflows/repo_checks.yml +++ b/.github/workflows/repo_checks.yml @@ -85,7 +85,7 @@ jobs: create_output cxx '*.cpp *.h *.hpp :!:test :!:targettests :!:tpls :!:**/nlopt-src/*' create_output cxx_headers '*.h *.hpp :!:test :!:targettests :!:tpls :!:**/nlopt-src/*' - create_output cxx_examples 'docs/sphinx/examples/**/*.cpp' + create_output cxx_examples 'docs/sphinx/examples/**/*.cpp' 'docs/sphinx/applications/cpp/*.cpp' 'docs/sphinx/targets/cpp/*.cpp' create_output python '*.py :!:python/tests :!:test :!:targettests :!:tpls :!:docs/sphinx/conf.py' create_output markdown '*.md :!:tpls' create_output rst '*.rst :!:tpls :!:docs/sphinx/_templates/**/*.rst' diff --git a/.licenserc.yaml b/.licenserc.yaml index 753103c272..62c5b8fc28 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -32,7 +32,9 @@ header: - 'examples' - 'tpls' - 'docs/sphinx/**/*.html' + - 'docs/sphinx/applications' - 'docs/sphinx/examples' + - 'docs/sphinx/targets' - 'docs/sphinx/_templates' - 'docs/sphinx/_static/cuda_quantum_icon.svg' - 'docker/test/installer/mpi_cuda_check.cpp' diff --git a/Developing.md b/Developing.md index e9ab1206eb..4f0e868ac5 100644 --- a/Developing.md +++ b/Developing.md @@ -28,7 +28,7 @@ line printed when you build the code and run an example using the command ```bash bash "$CUDAQ_REPO_ROOT/scripts/build_cudaq.sh" && \ -nvq++ "$CUDAQ_REPO_ROOT/docs/sphinx/examples/cpp/algorithms/grover.cpp" -o grover.out && \ +nvq++ "$CUDAQ_REPO_ROOT/docs/sphinx/applications/cpp/grover.cpp" -o grover.out && \ ./grover.out ``` @@ -36,18 +36,18 @@ When working on compiler internals, it can be useful to look at intermediate representations for CUDA-Q kernels. To see how the kernels in [this -example](./docs/sphinx/examples/cpp/algorithms/grover.cpp) are translated, you +example](./docs/sphinx/applications/cpp/grover.cpp) are translated, you can run ```bash -cudaq-quake $CUDAQ_REPO_ROOT/docs/sphinx/examples/cpp/algorithms/grover.cpp +cudaq-quake $CUDAQ_REPO_ROOT/docs/sphinx/applications/cpp/grover.cpp ``` to see its representation in the Quake MLIR dialect. To see its translation to [QIR](https://www.qir-alliance.org/), you can run ```bash -cudaq-quake $CUDAQ_REPO_ROOT/docs/sphinx/examples/cpp/algorithms/grover.cpp | +cudaq-quake $CUDAQ_REPO_ROOT/docs/sphinx/applications/cpp/grover.cpp | cudaq-opt --canonicalize --add-dealloc | quake-translate --convert-to=qir ``` diff --git a/docker/build/assets.Dockerfile b/docker/build/assets.Dockerfile index 73e20627ad..02e487996d 100644 --- a/docker/build/assets.Dockerfile +++ b/docker/build/assets.Dockerfile @@ -92,6 +92,8 @@ FROM prereqs AS cpp_build ADD "cmake" /cuda-quantum/cmake ADD "docs/CMakeLists.txt" /cuda-quantum/docs/CMakeLists.txt ADD "docs/sphinx/examples" /cuda-quantum/docs/sphinx/examples +ADD "docs/sphinx/applications" /cuda-quantum/docs/sphinx/applications +ADD "docs/sphinx/targets" /cuda-quantum/docs/sphinx/targets ADD "docs/sphinx/snippets" /cuda-quantum/docs/sphinx/snippets ADD "include" /cuda-quantum/include ADD "lib" /cuda-quantum/lib diff --git a/docker/release/cudaq.Dockerfile b/docker/release/cudaq.Dockerfile index 5964df396d..187fe19e5e 100644 --- a/docker/release/cudaq.Dockerfile +++ b/docker/release/cudaq.Dockerfile @@ -117,9 +117,10 @@ RUN adduser --disabled-password --gecos '' cudaq && adduser cudaq sudo \ ENV PATH="$PATH:/home/cudaq/.local/bin" ADD ./docs/sphinx/examples/ /home/cudaq/examples/ +ADD ./docs/sphinx/applications/ /home/cudaq/applications/ +ADD ./docs/sphinx/targets/ /home/cudaq/targets/ ADD ./docker/release/README.md /home/cudaq/README.md -RUN mv /home/cudaq/examples/python/tutorials /home/cudaq/tutorials \ - && chown -R cudaq /home/cudaq && chgrp -R cudaq /home/cudaq +RUN chown -R cudaq /home/cudaq && chgrp -R cudaq /home/cudaq USER cudaq WORKDIR /home/cudaq diff --git a/docker/test/installer/linux.Dockerfile b/docker/test/installer/linux.Dockerfile index 288974fa0e..7ed0a575a9 100644 --- a/docker/test/installer/linux.Dockerfile +++ b/docker/test/installer/linux.Dockerfile @@ -75,6 +75,8 @@ ADD scripts/validate_container.sh /home/cudaq/validate.sh ADD scripts/configure_build.sh /home/cudaq/configure_build.sh ADD docker/test/installer/mpi_cuda_check.cpp /home/cudaq/mpi_cuda_check.cpp ADD docs/sphinx/examples/cpp /home/cudaq/examples +ADD docs/sphinx/applications/cpp /home/cudaq/applications +ADD docs/sphinx/targets/cpp /home/cudaq/targets # Wheel to check side-by-side installation of Python and C++ support ARG cuda_quantum_wheel='cuda_quantum_*.whl' diff --git a/docker/test/wheels/debian.Dockerfile b/docker/test/wheels/debian.Dockerfile index f1e5d0f11e..7afe0ce0ea 100644 --- a/docker/test/wheels/debian.Dockerfile +++ b/docker/test/wheels/debian.Dockerfile @@ -31,6 +31,8 @@ ARG optional_dependencies= ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp311-cp311-manylinux_2_28_x86_64.whl COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel COPY docs/sphinx/examples/python /tmp/examples/ +COPY docs/sphinx/applications/python /tmp/applications/ +COPY docs/sphinx/targets/python /tmp/targets/ COPY docs/sphinx/snippets/python /tmp/snippets/ COPY python/tests /tmp/tests/ COPY python/README.md /tmp/README.md diff --git a/docker/test/wheels/fedora.Dockerfile b/docker/test/wheels/fedora.Dockerfile index 62a27a19d1..683d5ff14e 100644 --- a/docker/test/wheels/fedora.Dockerfile +++ b/docker/test/wheels/fedora.Dockerfile @@ -29,6 +29,8 @@ ARG optional_dependencies= ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp310-cp310-manylinux_2_28_x86_64.whl COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel COPY docs/sphinx/examples/python /tmp/examples/ +COPY docs/sphinx/applications/python /tmp/applications/ +COPY docs/sphinx/targets/python /tmp/targets/ COPY docs/sphinx/snippets/python /tmp/snippets/ COPY python/tests /tmp/tests/ COPY python/README.md /tmp/README.md diff --git a/docker/test/wheels/opensuse.Dockerfile b/docker/test/wheels/opensuse.Dockerfile index a125bb6913..cf7bae7819 100644 --- a/docker/test/wheels/opensuse.Dockerfile +++ b/docker/test/wheels/opensuse.Dockerfile @@ -26,6 +26,8 @@ ARG optional_dependencies= ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp39-cp39-manylinux_2_28_x86_64.whl COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel COPY docs/sphinx/examples/python /tmp/examples/ +COPY docs/sphinx/applications/python /tmp/applications/ +COPY docs/sphinx/targets/python /tmp/targets/ COPY docs/sphinx/snippets/python /tmp/snippets/ COPY python/tests /tmp/tests/ COPY python/README.md /tmp/README.md diff --git a/docker/test/wheels/redhat.Dockerfile b/docker/test/wheels/redhat.Dockerfile index 3492eaac82..561dff31fc 100644 --- a/docker/test/wheels/redhat.Dockerfile +++ b/docker/test/wheels/redhat.Dockerfile @@ -25,6 +25,8 @@ ARG optional_dependencies= ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp311-cp311-manylinux_2_28_x86_64.whl COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel COPY docs/sphinx/examples/python /tmp/examples/ +COPY docs/sphinx/applications/python /tmp/applications/ +COPY docs/sphinx/targets/python /tmp/targets/ COPY docs/sphinx/snippets/python /tmp/snippets/ COPY python/tests /tmp/tests/ COPY python/README.md /tmp/README.md diff --git a/docker/test/wheels/ubuntu.Dockerfile b/docker/test/wheels/ubuntu.Dockerfile index 2eafea8fc1..779e68a088 100644 --- a/docker/test/wheels/ubuntu.Dockerfile +++ b/docker/test/wheels/ubuntu.Dockerfile @@ -24,6 +24,8 @@ ARG optional_dependencies= ARG cuda_quantum_wheel=cuda_quantum-0.0.0-cp310-cp310-manylinux_2_28_x86_64.whl COPY $cuda_quantum_wheel /tmp/$cuda_quantum_wheel COPY docs/sphinx/examples/python /tmp/examples/ +COPY docs/sphinx/applications/python /tmp/applications/ +COPY docs/sphinx/targets/python /tmp/targets/ COPY docs/sphinx/snippets/python /tmp/snippets/ COPY python/tests /tmp/tests/ COPY python/README.md /tmp/README.md diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index 06199626e0..4b0b458518 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -9,7 +9,7 @@ # Add nvq++ compile + execution test of code examples # Args: # TEST_NAME: name of the test executable. Test name is prefixed with "nvqpp" -# SOURCE_LOCATION: location of the source file (relative to 'sphinx/examples/cpp' directory by default) +# SOURCE_LOCATION: location of the source file (relative to 'sphinx' directory by default) # Optional keyword args: # TARGET : name of the target to use # TARGET_OPTION