From b7a7a71d9ab66970e082474775952348b5a0eea4 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Mon, 19 Aug 2024 11:15:16 -0600 Subject: [PATCH 01/23] KokkosKernels: patch #2296 SpAdd handle: delete sort_option getter/setter (#2296) SpAdd handle was originally a copy-paste of the spgemm handle way back in #122, and included get_sort_option() and set_sort_option() from spgemm. But these try to use the member bool sort_option, which doesn't exist. Somehow these functions never produced compile errors until someone tried to call them. Fixes build errors on Sunspot. --- .../kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp index ea9594ca3e2f..8d28309585a7 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_spadd_handle.hpp @@ -102,10 +102,6 @@ class SPADDHandle { */ size_type get_c_nnz() { return this->result_nnz_size; } - void set_sort_option(int option) { this->sort_option = option; } - - int get_sort_option() { return this->sort_option; } - #ifdef KOKKOSKERNELS_ENABLE_TPL_CUSPARSE SpaddCusparseData cusparseData; #endif From fa40a223e62281547331d3f345e29b0da285eaf9 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 23 Aug 2024 06:30:06 -0700 Subject: [PATCH 02/23] Phalanx: add unit test for kokkos unique token, fix hip issue --- packages/phalanx/test/Kokkos/tKokkos.cpp | 52 +++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/packages/phalanx/test/Kokkos/tKokkos.cpp b/packages/phalanx/test/Kokkos/tKokkos.cpp index cdadac61779d..f7404b7444f1 100644 --- a/packages/phalanx/test/Kokkos/tKokkos.cpp +++ b/packages/phalanx/test/Kokkos/tKokkos.cpp @@ -1,6 +1,6 @@ // @HEADER // ***************************************************************************** -// Phalanx: A Partial Differential Equation Field Evaluation +// Phalanx: A Partial Differential Equation Field Evaluation // Kernel for Flexible Management of Complex Dependency Chains // // Copyright 2008 NTESS and the Phalanx contributors. @@ -828,6 +828,8 @@ namespace phalanx_test { #if defined(KOKKOS_ENABLE_CUDA) using DefaultFadLayout = Kokkos::LayoutContiguous; +#elif defined(KOKKOS_ENABLE_HIP) + using DefaultFadLayout = Kokkos::LayoutContiguous; #else using DefaultFadLayout = Kokkos::LayoutContiguous; #endif @@ -969,4 +971,52 @@ namespace phalanx_test { TEST_FLOATING_EQUALITY(mean,mean_gold,tol); TEST_FLOATING_EQUALITY(stddev,stddev_gold,tol); } + + struct Inner { + Kokkos::Experimental::UniqueToken token_; + }; + + struct Outer { + Inner inner_; + }; + + TEUCHOS_UNIT_TEST(kokkos, UniqueToken) + { + Kokkos::print_configuration(out); + + // Set to false for typical unit testing. If set to true, the cuda + // and hip concurrency can be really large on modern hardware and + // can require a lot of memory. It still runs fast but memory + // requirements might cause issues if overloading gpus with + // multiple unit tests. + const bool use_all_concurrency = false; + size_t scratch_space_size = 10; + if (use_all_concurrency) + scratch_space_size = Kokkos::DefaultExecutionSpace().concurrency(); + + Kokkos::Experimental::UniqueToken token(scratch_space_size); + + out << "\nconcurrency = " << Kokkos::DefaultExecutionSpace().concurrency() << std::endl; + out << "UniqueToken.size() = " << token.size() << std::endl; + + if (use_all_concurrency) { + TEST_EQUALITY(Kokkos::DefaultExecutionSpace().concurrency(), token.size()); + } + else { + TEST_EQUALITY(scratch_space_size, token.size()); + } + + const size_t num_elements = token.size()+10; + Outer o; + + Kokkos::View scratch_space("scratch space",token.size()); + Kokkos::parallel_for("unique token",num_elements,KOKKOS_LAMBDA(const int cell){ + Kokkos::Experimental::AcquireUniqueToken lock(o.inner_.token_); + const auto t = lock.value(); + scratch_space(t) = cell; + // printf("cell=%d, t=%u, equal=%u\n",cell,t,unsigned(cell == t)); + }); + } + } From 8f245aa2e27365ad243e622d9f9569871d276989 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 23 Aug 2024 08:34:35 -0600 Subject: [PATCH 03/23] Phalanx: add some sanity check tests --- packages/phalanx/test/Kokkos/tKokkos.cpp | 66 +++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/packages/phalanx/test/Kokkos/tKokkos.cpp b/packages/phalanx/test/Kokkos/tKokkos.cpp index f7404b7444f1..283705724bc6 100644 --- a/packages/phalanx/test/Kokkos/tKokkos.cpp +++ b/packages/phalanx/test/Kokkos/tKokkos.cpp @@ -1004,7 +1004,7 @@ namespace phalanx_test { TEST_EQUALITY(Kokkos::DefaultExecutionSpace().concurrency(), token.size()); } else { - TEST_EQUALITY(scratch_space_size, token.size()); + TEST_EQUALITY(scratch_space_size, static_cast(token.size())); } const size_t num_elements = token.size()+10; @@ -1019,4 +1019,68 @@ namespace phalanx_test { }); } + TEUCHOS_UNIT_TEST(kokkos, ReduceCheck) + { + constexpr int size = 10; + double gold_sum = 0.0; + Kokkos::View parts("parts",size); + auto parts_host = Kokkos::create_mirror_view(parts); + for (int i=0; i < size; ++i) { + parts_host(i) = double(i); + + if (i%2 == 0) + gold_sum += double(i); + } + Kokkos::deep_copy(parts,parts_host); + + double sum = 0.0; + Kokkos::parallel_reduce("sum",10,KOKKOS_LAMBDA(const int i, double& tmp){ + if (i%2 == 0) + tmp += parts(i); + // printf("tmp(%d)=%f \n",i,tmp); + },sum); + out << "sum = " << sum << std::endl; + const double tol = Teuchos::ScalarTraits::eps()*1000.0; + TEST_FLOATING_EQUALITY(sum,gold_sum,tol); + } + + TEUCHOS_UNIT_TEST(kokkos, ScanCheck) + { + constexpr int size = 10; + Kokkos::View parts("parts",size); + auto parts_host = Kokkos::create_mirror_view(parts); + for (int i=0; i < size; ++i) + parts_host(i)=double(i); + Kokkos::deep_copy(parts,parts_host); + + Kokkos::View inclusive_scan("inclusive",size); + Kokkos::View exclusive_scan("exclusive",size); + double result = 0.0; + Kokkos::parallel_scan("sum",10,KOKKOS_LAMBDA(const int i, double& partial_sum, const bool is_final){ + if (is_final) + exclusive_scan(i) = partial_sum; + + partial_sum += parts(i); + + if (is_final) + inclusive_scan(i) += partial_sum; + + // printf("partial_sum(%d)=%f, is_final=%d \n",i,partial_sum,int(is_final)); + },result); + + auto is_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),inclusive_scan); + auto es_host = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(),exclusive_scan); + + for (int i=0; i < size; ++i) + out << "inclusive_scan(" << i << ") = " << is_host(i) << ", parts(" << i << ") = " << parts_host(i) << std::endl; + for (int i=0; i < size; ++i) + out << "exclusive_scan(" << i << ") = " << es_host(i) << ", parts(" << i << ") = " << parts_host(i) << std::endl; + out << "result (exclusive end) = " << result << std::endl; + + const double tol = Teuchos::ScalarTraits::eps()*100.0; + for (int i=0; i < size; ++i) { + TEST_FLOATING_EQUALITY(is_host(i)-es_host(i), parts_host(i), tol); + } + } + } From d73817361ec37eca42a185364807f636b98df050 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 23 Aug 2024 11:48:30 -0600 Subject: [PATCH 04/23] Phalanx: simplify UniqueToken test --- packages/phalanx/test/Kokkos/tKokkos.cpp | 34 +++++++----------------- 1 file changed, 10 insertions(+), 24 deletions(-) diff --git a/packages/phalanx/test/Kokkos/tKokkos.cpp b/packages/phalanx/test/Kokkos/tKokkos.cpp index 283705724bc6..b42e8355e4b3 100644 --- a/packages/phalanx/test/Kokkos/tKokkos.cpp +++ b/packages/phalanx/test/Kokkos/tKokkos.cpp @@ -843,13 +843,13 @@ namespace phalanx_test { static_assert(std::is_same::value,"ERROR: Layout Inconsistency!"); static_assert(std::is_same::value,"ERROR: Layout Inconsistency!"); - std::cout << "\n\nscalar_view_layout = " << PHX::print() << std::endl; - std::cout << "scalar_dev_layout = " << PHX::print() << std::endl; - std::cout << "DefaultDevLayout = " << PHX::print() << "\n" << std::endl; + out << "\n\nscalar_view_layout = " << PHX::print() << std::endl; + out << "scalar_dev_layout = " << PHX::print() << std::endl; + out << "DefaultDevLayout = " << PHX::print() << "\n" << std::endl; - std::cout << "fad_view_layout = " << PHX::print() << std::endl; - std::cout << "fad_dev_layout = " << PHX::print() << std::endl; - std::cout << "DefaultFadLayout = " << PHX::print() << "\n" << std::endl; + out << "fad_view_layout = " << PHX::print() << std::endl; + out << "fad_dev_layout = " << PHX::print() << std::endl; + out << "DefaultFadLayout = " << PHX::print() << "\n" << std::endl; // Tests for assignments from static View to DynRankView Kokkos::View::type,PHX::Device> static_a("static_a",100,8,64); @@ -984,28 +984,14 @@ namespace phalanx_test { { Kokkos::print_configuration(out); - // Set to false for typical unit testing. If set to true, the cuda - // and hip concurrency can be really large on modern hardware and - // can require a lot of memory. It still runs fast but memory - // requirements might cause issues if overloading gpus with - // multiple unit tests. - const bool use_all_concurrency = false; - size_t scratch_space_size = 10; - if (use_all_concurrency) - scratch_space_size = Kokkos::DefaultExecutionSpace().concurrency(); + using ExecutionSpace = PHX::exec_space; - Kokkos::Experimental::UniqueToken token(scratch_space_size); + Kokkos::Experimental::UniqueToken token; - out << "\nconcurrency = " << Kokkos::DefaultExecutionSpace().concurrency() << std::endl; + out << "\nExecutionSpace.concurrency() = " << ExecutionSpace().concurrency() << std::endl; out << "UniqueToken.size() = " << token.size() << std::endl; - if (use_all_concurrency) { - TEST_EQUALITY(Kokkos::DefaultExecutionSpace().concurrency(), token.size()); - } - else { - TEST_EQUALITY(scratch_space_size, static_cast(token.size())); - } + TEST_EQUALITY(ExecutionSpace().concurrency(), token.size()); const size_t num_elements = token.size()+10; Outer o; From 8a15777ffaf412f49c51e6efae3ac2e9acd501e0 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Fri, 23 Aug 2024 12:02:50 -0600 Subject: [PATCH 05/23] Phalanx: mark the kokkos acceptance ctest as RUN_SERIAL --- packages/phalanx/test/Kokkos/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/packages/phalanx/test/Kokkos/CMakeLists.txt b/packages/phalanx/test/Kokkos/CMakeLists.txt index a13eaf8b99a6..98b582cd8c2a 100644 --- a/packages/phalanx/test/Kokkos/CMakeLists.txt +++ b/packages/phalanx/test/Kokkos/CMakeLists.txt @@ -1,11 +1,14 @@ TRIBITS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}) TRIBITS_INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR}/../Utilities) +# RUN_SERIAL is added since UniqueToken can require a large amount of +# memory on GPUs. TRIBITS_ADD_EXECUTABLE_AND_TEST( tKokkos SOURCES tKokkos.cpp TESTONLYLIBS phalanx_unit_test_main phalanx_test_utilities NUM_MPI_PROCS 1 + RUN_SERIAL ) TRIBITS_ADD_EXECUTABLE_AND_TEST( From 64f5e38caee76a8778ea259b3300a6be7e65d206 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Mon, 26 Aug 2024 10:04:27 -0600 Subject: [PATCH 06/23] IOSS: Fix serialize io issue in timestep query --- .../seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C index d368bd2ff93d..d84be0925268 100644 --- a/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C +++ b/packages/seacas/libraries/ioss/src/exodus/Ioex_DatabaseIO.C @@ -684,6 +684,7 @@ namespace Ioex { { Ioss::SerializeIO serializeIO_(this); m_timestepCount = ex_inquire_int(get_file_pointer(), EX_INQ_TIME); + } // Need to sync timestep count across ranks if parallel... if (isParallel) { auto min_timestep_count = util().global_minmax(m_timestepCount, Ioss::ParallelUtils::DO_MIN); @@ -725,6 +726,7 @@ namespace Ioex { Ioss::Utils::check_set_bool_property(properties, "EXODUS_CALL_GET_ALL_TIMES", call_ex_get_all_times); if (call_ex_get_all_times) { + Ioss::SerializeIO serializeIO_(this); int error = ex_get_all_times(get_file_pointer(), Data(tsteps)); if (error < 0) { Ioex::exodus_error(get_file_pointer(), __LINE__, __func__, __FILE__); @@ -733,8 +735,11 @@ namespace Ioex { // See if the "last_written_time" attribute exists and if it // does, check that it matches the largest time in 'tsteps'. - exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time); - } + { + Ioss::SerializeIO serializeIO_(this); + exists = Ioex::read_last_time_attribute(get_file_pointer(), &last_time); + } + if (exists && isParallel) { // Assume that if it exists on 1 processor, it exists on // all... Sync value among processors since could have a From db5b7066945f0498b60caee9a88427752d9da978 Mon Sep 17 00:00:00 2001 From: Daniel Arndt Date: Mon, 26 Aug 2024 13:35:33 -0400 Subject: [PATCH 07/23] Sacado: Band-aid fix for LayoutContiguous --- packages/sacado/src/Kokkos_LayoutContiguous.hpp | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/sacado/src/Kokkos_LayoutContiguous.hpp b/packages/sacado/src/Kokkos_LayoutContiguous.hpp index dedc05c78a86..aec933d5cb2c 100644 --- a/packages/sacado/src/Kokkos_LayoutContiguous.hpp +++ b/packages/sacado/src/Kokkos_LayoutContiguous.hpp @@ -73,6 +73,7 @@ struct inner_layout< LayoutContiguous > { } // namespace Kokkos +// FIXME This is evil and needs refactoring urgently. // Make LayoutContiguous equivalent to Layout namespace std { @@ -81,11 +82,16 @@ namespace std { static const bool value = true; }; + template + static constexpr bool is_same_v< Kokkos::LayoutContiguous, Layout> = is_same, Layout>::value; + template struct is_same< Layout, Kokkos::LayoutContiguous > { static const bool value = true; }; + template + static constexpr bool is_same_v< Layout, Kokkos::LayoutContiguous> = is_same, Layout>::value; } #include "impl/Kokkos_ViewMapping.hpp" From 30966e4c3e3223dca6fb350aeafc193769f0066a Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Aug 2024 22:39:59 +0000 Subject: [PATCH 08/23] Bump github/codeql-action from 3.26.3 to 3.26.5 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 3.26.3 to 3.26.5. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/883d8588e56d1753a8a58c1c86e88976f0c23449...2c779ab0d087cd7fe7b826087247c2c81f27bfa6) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 4 ++-- .github/workflows/scorecards.yml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index a6eacc9354d5..4b9a58337ea4 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -62,7 +62,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/init@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} @@ -85,6 +85,6 @@ jobs: make -j 2 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/analyze@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index a4b85a7ee90c..1a77e450bd96 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -66,6 +66,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard. - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@883d8588e56d1753a8a58c1c86e88976f0c23449 # v3.26.3 + uses: github/codeql-action/upload-sarif@2c779ab0d087cd7fe7b826087247c2c81f27bfa6 # v3.26.5 with: sarif_file: results.sarif From 2b5b745aacd6b5b2db749a0a313b1949aff6f72a Mon Sep 17 00:00:00 2001 From: Nathan Ellingwood Date: Mon, 26 Aug 2024 17:58:41 -0600 Subject: [PATCH 09/23] sacado: update Kokkos_ViewMapping.hpp location Kokkos compatibility update following https://github.com/kokkos/kokkos/pull/7256 --- packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp | 4 ++++ packages/sacado/src/Kokkos_LayoutContiguous.hpp | 4 ++++ packages/sacado/src/Kokkos_LayoutNatural.hpp | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp b/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp index 1dcf26543372..c5db121cdb59 100644 --- a/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp +++ b/packages/sacado/src/KokkosExp_View_Fad_Contiguous.hpp @@ -148,7 +148,11 @@ namespace Sacado { #include "Sacado_Traits.hpp" #include "Kokkos_Core.hpp" +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif //---------------------------------------------------------------------------- diff --git a/packages/sacado/src/Kokkos_LayoutContiguous.hpp b/packages/sacado/src/Kokkos_LayoutContiguous.hpp index dedc05c78a86..016e430754a6 100644 --- a/packages/sacado/src/Kokkos_LayoutContiguous.hpp +++ b/packages/sacado/src/Kokkos_LayoutContiguous.hpp @@ -88,7 +88,11 @@ namespace std { } +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif namespace Kokkos { namespace Impl { diff --git a/packages/sacado/src/Kokkos_LayoutNatural.hpp b/packages/sacado/src/Kokkos_LayoutNatural.hpp index e4e77d023c1c..1a5ae982295f 100644 --- a/packages/sacado/src/Kokkos_LayoutNatural.hpp +++ b/packages/sacado/src/Kokkos_LayoutNatural.hpp @@ -79,7 +79,11 @@ namespace std { } +#if KOKKOS_VERSION >= 40499 +#include "View/Kokkos_ViewMapping.hpp" +#else #include "impl/Kokkos_ViewMapping.hpp" +#endif namespace Kokkos { namespace Impl { From 3132245ca214656f620d3f57948fb4af8f90f3be Mon Sep 17 00:00:00 2001 From: mperego Date: Tue, 27 Aug 2024 11:07:58 -0600 Subject: [PATCH 10/23] Intrepid2: fix C++20 deprecated code and potential type mismatch issue (#13394) * Intrepid2: fix Inclusion checks for non-double point types * Intrepid2: changes to avoid warnings with c++20 standard (fixes #12786) --- .../intrepid2/src/Cell/Intrepid2_CellData.hpp | 28 +++++------ .../src/Cell/Intrepid2_CellDataDef.hpp | 48 +++++++++---------- .../src/Cell/Intrepid2_CellTools.hpp | 17 ++++--- .../Cell/Intrepid2_CellToolsDefInclusion.hpp | 29 +++++------ .../Intrepid2_IntegrationToolsDef.hpp | 6 +-- .../src/Shared/Intrepid2_TestUtils.hpp | 4 +- .../intrepid2/src/Shared/Intrepid2_Types.hpp | 16 ++++++- .../intrepid2/src/Shared/Intrepid2_Utils.hpp | 16 +++---- .../new_design/Sacado_Fad_Exp_ViewStorage.hpp | 2 +- 9 files changed, 92 insertions(+), 74 deletions(-) diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp index 863bb0b18402..a4bcad3b089a 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellData.hpp @@ -337,10 +337,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -348,10 +348,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -360,10 +360,10 @@ template template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -371,10 +371,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -382,10 +382,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -393,10 +393,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; /** @@ -404,10 +404,10 @@ template */ template<> struct PointInclusion::key> { - template + template KOKKOS_INLINE_FUNCTION static bool - check(const PointViewType &point, const double threshold); + check(const PointViewType &point, const ScalarType threshold); }; } diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp index 1c7969c51655..6d9070dfda32 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellDataDef.hpp @@ -826,76 +826,76 @@ refCenterDataStatic_ = { // Point Inclusion - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return (minus_one <= point(0) && point(0) <= plus_one); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1.0 ); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1.0 ); return distance < threshold; } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: check(const PointViewType &point, - const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return ((minus_one <= point(0) && point(0) <= plus_one) && (minus_one <= point(1) && point(1) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double distance = max( max(-point(0),-point(1)), + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType distance = max( max(-point(0),-point(1)), max(-point(2), point(0) + point(1) + point(2) - 1) ); return distance < threshold; } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; return ((minus_one <= point(0) && point(0) <= plus_one) && (minus_one <= point(1) && point(1) <= plus_one) && (minus_one <= point(2) && point(2) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold, minus_zero = -threshold; - const double left = minus_one + point(2); - const double right = plus_one - point(2); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold, minus_zero = -threshold; + const ScalarType left = minus_one + point(2); + const ScalarType right = plus_one - point(2); return ((left <= point(0) && point(0) <= right) && (left <= point(1) && point(1) <= right) && (minus_zero <= point(2) && point(2) <= plus_one)); } - template + template KOKKOS_INLINE_FUNCTION bool PointInclusion::key>:: - check(const PointViewType &point, const double threshold) { - const double minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; - const double distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1 ); + check(const PointViewType &point, const ScalarType threshold) { + const ScalarType minus_one = -1.0 - threshold, plus_one = 1.0 + threshold; + const ScalarType distance = max( max( -point(0), -point(1) ), point(0) + point(1) - 1 ); return (distance < threshold && (minus_one <= point(2) && point(2) <= plus_one)); } diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp index 8a522a544ad3..9efe77a264f9 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp @@ -1396,11 +1396,13 @@ namespace Intrepid2 { \param threshold [in] - "tightness" of the inclusion test \return true if the point is in the closure of the specified reference cell and false otherwise. */ - template + template static bool - checkPointInclusion( const pointViewType point, + checkPointInclusion( const PointViewType point, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); + /** \brief Checks every point for inclusion in the reference cell of a given topology. @@ -1417,7 +1419,8 @@ namespace Intrepid2 { typename InputViewType> static void checkPointwiseInclusion( OutputViewType inCell, const InputViewType points, - const double thresh = threshold()); + const typename ScalarTraits::scalar_type thresh = + threshold::scalar_type>()); @@ -1434,7 +1437,8 @@ namespace Intrepid2 { static void checkPointwiseInclusion( InCellViewType inCell, const PointViewType points, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); /** \brief Checks every points for inclusion in physical cells from a cell workset. The points can belong to a global set and stored in a rank-2 (P,D) view, @@ -1454,7 +1458,8 @@ namespace Intrepid2 { const Kokkos::DynRankView points, const Kokkos::DynRankView cellWorkset, const shards::CellTopology cellTopo, - const double thres = threshold() ); + const typename ScalarTraits::scalar_type thres = + threshold::scalar_type>() ); // //============================================================================================// diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp index 5e1b091e3638..1d9ecfe94b63 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellToolsDefInclusion.hpp @@ -34,9 +34,9 @@ namespace Intrepid2 { template bool CellTools:: - checkPointInclusion( const PointViewType point, - const shards::CellTopology cellTopo, - const double threshold) { + checkPointInclusion( const PointViewType point, + const shards::CellTopology cellTopo, + const typename ScalarTraits::scalar_type threshold) { #ifdef HAVE_INTREPID2_DEBUG INTREPID2_TEST_FOR_EXCEPTION( point.rank() != 1, std::invalid_argument, ">>> ERROR (Intrepid2::CellTools::checkPointInclusion): Point must have rank 1. "); @@ -94,12 +94,13 @@ namespace Intrepid2 { struct checkPointInclusionFunctor { OutputViewType output_; InputViewType input_; - double threshold_; + using ScalarType = typename ScalarTraits::scalar_type; + ScalarType threshold_; KOKKOS_INLINE_FUNCTION - checkPointInclusionFunctor( OutputViewType output, - const InputViewType input, - const double threshold) + checkPointInclusionFunctor( OutputViewType output, + const InputViewType input, + const ScalarType threshold) : output_(output), input_(input), threshold_(threshold) {} @@ -129,7 +130,7 @@ namespace Intrepid2 { void CellTools:: checkPointwiseInclusion( OutputViewType inCell, const InputViewType points, - const double threshold) { + const typename ScalarTraits::scalar_type threshold) { using FunctorType = checkPointInclusionFunctor; if (points.rank() == 2) { // inCell.rank() == 1 @@ -144,13 +145,13 @@ namespace Intrepid2 { template template + typename InputViewType> void CellTools:: - checkPointwiseInclusion( InCellViewType inCell, - const PointViewType points, - const shards::CellTopology cellTopo, - const double threshold ) { + checkPointwiseInclusion( InCellViewType inCell, + const InputViewType points, + const shards::CellTopology cellTopo, + const typename ScalarTraits::scalar_type threshold ) { #ifdef HAVE_INTREPID2_DEBUG { INTREPID2_TEST_FOR_EXCEPTION( (inCell.rank() != 1) && (inCell.rank() != 2), std::invalid_argument, @@ -218,7 +219,7 @@ namespace Intrepid2 { const Kokkos::DynRankView points, const Kokkos::DynRankView cellWorkset, const shards::CellTopology cellTopo, - const double threshold ) { + const typename ScalarTraits::scalar_type threshold ) { #ifdef HAVE_INTREPID2_DEBUG { const auto key = cellTopo.getBaseKey(); diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp index 380e820c2d71..674630be879b 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp @@ -123,7 +123,7 @@ namespace Intrepid2 { // prepare for allocation of temporary storage // note: tempStorage goes "backward", starting from the final component, which needs just one entry - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { fad_size_output_ = dimension_scalar(integralView_); @@ -1063,7 +1063,7 @@ namespace Intrepid2 { // prepare for allocation of temporary storage // note: tempStorage goes "backward", starting from the final component, which needs just one entry - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { fad_size_output_ = dimension_scalar(integralView_); @@ -2127,7 +2127,7 @@ void IntegrationTools::integrate(Data integrals, { ScalarView componentIntegralView; - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (allocateFadStorage) { auto fad_size_output = dimension_scalar(integrals.getUnderlyingView()); diff --git a/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp b/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp index 6423eb68d80f..6e56356d86fe 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TestUtils.hpp @@ -203,7 +203,7 @@ namespace Intrepid2 template inline ViewType getView(const std::string &label, DimArgs... dims) { - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (!allocateFadStorage) { return ViewType(label,dims...); @@ -218,7 +218,7 @@ namespace Intrepid2 template inline FixedRankViewType< typename RankExpander::value_type, DefaultTestDeviceType > getFixedRankView(const std::string &label, DimArgs... dims) { - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); using value_type = typename RankExpander::value_type; if (!allocateFadStorage) { diff --git a/packages/intrepid2/src/Shared/Intrepid2_Types.hpp b/packages/intrepid2/src/Shared/Intrepid2_Types.hpp index c64ba7bf4238..8afcc665653d 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Types.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Types.hpp @@ -65,14 +65,26 @@ namespace Intrepid2 { return epsilon(); } + template + KOKKOS_FORCEINLINE_FUNCTION + ValueType tolerence() { + return 100.0*epsilon(); + } + KOKKOS_FORCEINLINE_FUNCTION double tolerence() { - return 100.0*epsilon(); + return tolerence(); + } + + template + KOKKOS_FORCEINLINE_FUNCTION + ValueType threshold() { + return 10.0*epsilon(); } KOKKOS_FORCEINLINE_FUNCTION double threshold() { - return 10.0*epsilon(); + return threshold(); } /// Define constants diff --git a/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp b/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp index 14ad8483558a..45c5f09816d1 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Utils.hpp @@ -281,13 +281,13 @@ namespace Intrepid2 { template KOKKOS_FORCEINLINE_FUNCTION constexpr typename - std::enable_if< !std::is_pod::value, typename ScalarTraits::scalar_type >::type + std::enable_if< !(std::is_standard_layout::value && std::is_trivial::value), typename ScalarTraits::scalar_type >::type get_scalar_value(const T& obj) {return obj.val();} template KOKKOS_FORCEINLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod::value, typename ScalarTraits::scalar_type >::type + std::enable_if< std::is_standard_layout::value && std::is_trivial::value, typename ScalarTraits::scalar_type >::type get_scalar_value(const T& obj){return obj;} @@ -300,13 +300,13 @@ namespace Intrepid2 { template KOKKOS_INLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod::value, unsigned >::type + std::enable_if< std::is_standard_layout::value && std::is_trivial::value, unsigned >::type dimension_scalar(const Kokkos::DynRankView /* view */) {return 1;} template KOKKOS_INLINE_FUNCTION constexpr typename - std::enable_if< std::is_pod< typename Kokkos::View::value_type >::value, unsigned >::type + std::enable_if< std::is_standard_layout::value_type>::value && std::is_trivial::value_type>::value, unsigned >::type dimension_scalar(const Kokkos::View /*view*/) {return 1;} template @@ -339,7 +339,7 @@ namespace Intrepid2 { using DeviceType = typename ViewType::device_type; using ViewTypeWithLayout = Kokkos::DynRankView; - const bool allocateFadStorage = !std::is_pod::value; + const bool allocateFadStorage = !(std::is_standard_layout::value && std::is_trivial::value); if (!allocateFadStorage) { return ViewTypeWithLayout(label,dims...); @@ -766,7 +766,7 @@ namespace Intrepid2 { template struct NaturalLayoutForType { using layout = - typename std::conditional::value, + typename std::conditional<(std::is_standard_layout::value && std::is_trivial::value), Kokkos::LayoutLeft, // for POD types, use LayoutLeft Kokkos::LayoutNatural >::type; // For FAD types, use LayoutNatural }; @@ -791,7 +791,7 @@ namespace Intrepid2 { template constexpr int getVectorSizeForHierarchicalParallelism() { - return std::is_pod::value ? VECTOR_SIZE : FAD_VECTOR_SIZE; + return (std::is_standard_layout::value && std::is_trivial::value) ? VECTOR_SIZE : FAD_VECTOR_SIZE; } /** @@ -803,7 +803,7 @@ namespace Intrepid2 { KOKKOS_INLINE_FUNCTION constexpr unsigned getScalarDimensionForView(const ViewType &view) { - return (std::is_pod::value) ? 0 : get_dimension_scalar(view); + return (std::is_standard_layout::value && std::is_trivial::value) ? 0 : get_dimension_scalar(view); } } // end namespace Intrepid2 diff --git a/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp b/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp index 184eea7bae11..3b67f97d8cf3 100644 --- a/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp +++ b/packages/sacado/src/new_design/Sacado_Fad_Exp_ViewStorage.hpp @@ -79,7 +79,7 @@ namespace Sacado { //! Constructor SACADO_INLINE_FUNCTION ViewStorage(T* v, const int arg_size = 0, const int arg_stride = 0) : - sz_(arg_size), stride_(arg_stride), val_(v+sz_.value*stride_.value), dx_(v) {} + sz_(arg_size), stride_(arg_stride), val_(v+sz_.value*static_cast(stride_.value)), dx_(v) {} //! Constructor SACADO_INLINE_FUNCTION From fc7cbe9a9f85042b10deb799c027bff0f62469c2 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Tue, 27 Aug 2024 11:17:10 -0600 Subject: [PATCH 11/23] RTOp: fix for c++20 --- packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp b/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp index 3e23800b14d4..07bd6071ab84 100644 --- a/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp +++ b/packages/rtop/src/support/RTOpPack_SPMD_apply_op_decl.hpp @@ -156,7 +156,7 @@ class ReductTargetReductionOp Teuchos::RCP > op_; // Not defined and not to be called! ReductTargetReductionOp(); - ReductTargetReductionOp(const ReductTargetReductionOp&); + ReductTargetReductionOp(const ReductTargetReductionOp&); ReductTargetReductionOp& operator=(const ReductTargetReductionOp&); }; From 9c6525bf849f4e0a9ebfadda2e5dd7418c78ae45 Mon Sep 17 00:00:00 2001 From: Nate Roberts Date: Tue, 27 Aug 2024 12:50:25 -0500 Subject: [PATCH 12/23] Intrepid2: add support for weighted gradients in sum-factorized assembly (#13391) Intrepid2: This PR adds support for additional uses cases to the existing sum-factorized assembly mechanism. On the way, it adds support for vector weights to `TransformedBasisValues`; previously, only matrix and scalar weights were supported. This PR also makes some minor adjustments to the timings performed in `StructuredIntegrationPerformance.cpp` to ensure fair comparisons. The new use cases are demonstrated most directly in the following: ``` packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp ``` which demonstrates performing an integral of the form $`({\mathbf a} \cdot \nabla e_i, {\mathbf b} \cdot \nabla e_j)`$ for an $H^1$ basis. ## Testing The new use cases are well-exercised by the tests; a new set of `VectorWeightedPoisson` test cases (corresponding to the integral above) are included in the structured-versus-standard tests. ## Other notes/details: - Added support for dot products to Intrepid2_Data; added a corresponding test. Also hardened the test MatVec_CPDD_transpose to include a check that u' A v = v' A' u for vectors u, v. - Added free function rank() taking BasisValues object as argument. - Data: fixed an issue in allocateMatVec in which an incorrect variation type and/or incorrect extent could be used for the final result dimension. - TransformedBasisValues: added support for a (C,P,D) transform, with the main use case being a dot product with a vector-valued basis evaluation. - In allocateMatMatResult(), fixed an issue in which the wrong getUnderlyingView() method was being called; resolved by calling the one that gets a DynRankView. - Added a test against taking the outer product of two vectors; fixed the issue that this demonstrated in Intrepid2::Data. - Added test template StructuredVersusStandardVectorWeighted_D2_P1_P1. - Fixed an issue with the transpose arguments to a mat-mat call. - In setJacobianDetInv(), corrected argument name in method declaration and doxygen. - In DataTools, broadened the use cases for multiplyByCPWeights(), and added a transposeMatrix() method. - In TransformedBasisValues, fixed some issues with spaceDim(). - In StandardAssembly and StructuredAssembly, revised to support more vector-weighted use cases. - Added more vector-weighted tests, covering cases when a vector field is dotted with a vector and then integrated against a scalar. - Modified standard assembly performance tests to exclude the orientation application from "core integration" timing. - Added VectorWeightedPoisson to allFormulationChoices. For now, setting the "best" CUDA choices to match Poisson. But we need to redo those calibrations regardless, and maybe switch to reading them in from file: these are for older CUDA cards. --- .../GRADGRADStandardAssembly.hpp | 3 + .../assembly-examples/H1StandardAssembly.hpp | 6 + .../HCURLStandardAssembly.hpp | 6 + .../HDIVStandardAssembly.hpp | 7 +- .../HVOLStandardAssembly.hpp | 3 + .../assembly-examples/StandardAssembly.hpp | 228 +++++++-- .../assembly-examples/StructuredAssembly.hpp | 78 +++- ...VectorWeightedGRADGRADStandardAssembly.hpp | 205 ++++++++ ...ctorWeightedGRADGRADStructuredAssembly.hpp | 187 ++++++++ .../src/Cell/Intrepid2_CellTools.hpp | 6 +- .../Basis/Intrepid2_BasisValues.hpp | 36 +- .../Intrepid2_IntegrationToolsDef.hpp | 279 ++++++++--- .../intrepid2/src/Shared/Intrepid2_Data.hpp | 107 ++++- .../src/Shared/Intrepid2_DataTools.hpp | 69 ++- .../Intrepid2_TransformedBasisValues.hpp | 85 +++- .../MonolithicExecutable/DataTests.cpp | 147 ++++++ ...rationTests_GeneralStandardIntegration.cpp | 31 +- ...tionTests_GeneralStructuredIntegration.cpp | 31 +- ...IntegrationTests_QuadratureUniformMesh.cpp | 347 +++++++------- ...egrationTests_StructuredVersusStandard.cpp | 441 +++++++++++++++++- .../StructuredIntegrationTests_TagDefs.hpp | 7 +- .../StructuredIntegrationTests_Utils.hpp | 22 +- .../TransformedBasisValuesTests.cpp | 223 ++++++++- .../StructuredIntegrationPerformance.cpp | 182 ++++++-- 24 files changed, 2362 insertions(+), 374 deletions(-) create mode 100644 packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp create mode 100644 packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp diff --git a/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp b/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp index 099893c6facd..8fc7852e7650 100644 --- a/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/GRADGRADStandardAssembly.hpp @@ -145,8 +145,11 @@ Intrepid2::ScalarView performStandardQuadratureGRADGRAD(Intre // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); diff --git a/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp b/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp index 455be4e39471..21fb9207ef0f 100644 --- a/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/H1StandardAssembly.hpp @@ -151,8 +151,11 @@ Intrepid2::ScalarView performStandardQuadratureH1(Intrepid2:: // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. @@ -163,8 +166,11 @@ Intrepid2::ScalarView performStandardQuadratureH1(Intrepid2:: ExecutionSpace().fence(); FunctionSpaceTools::HGRADtransformVALUE(unorientedTransformedBasisValues, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (grad,grad) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp index 17724153fcf5..a29c80bdbb2c 100644 --- a/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HCURLStandardAssembly.hpp @@ -175,8 +175,11 @@ Intrepid2::ScalarView performStandardQuadratureHCURL(Intrepid // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HCURLtransformCURL(unorientedTransformedCurlValues, jacobian, jacobianDeterminant, basisCurlValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedCurlValues, unorientedTransformedCurlValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedCurlValues, cellMeasures, transformedCurlValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedCurlValues: one flop for each. @@ -186,8 +189,11 @@ Intrepid2::ScalarView performStandardQuadratureHCURL(Intrepid FunctionSpaceTools::integrate(cellStiffnessSubview, transformedCurlValues, transformedWeightedCurlValues); FunctionSpaceTools::HCURLtransformVALUE(unorientedTransformedBasisValues, jacobianInverse, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (curl,curl) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp index 04f415c88afc..2e50d065a732 100644 --- a/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HDIVStandardAssembly.hpp @@ -151,8 +151,11 @@ Intrepid2::ScalarView performStandardQuadratureHDIV(Intrepid2 // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. fstIntegrateCall->start(); FunctionSpaceTools::HDIVtransformDIV(unorientedTransformedDivValues, jacobianDeterminant, basisDivValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedDivValues, unorientedTransformedDivValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. FunctionSpaceTools::multiplyMeasure(transformedWeightedDivValues, cellMeasures, transformedDivValues); transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedDivValues: one flop for each. @@ -161,10 +164,12 @@ Intrepid2::ScalarView performStandardQuadratureHDIV(Intrepid2 FunctionSpaceTools::integrate(cellStiffnessSubview, transformedDivValues, transformedWeightedDivValues); ExecutionSpace().fence(); - FunctionSpaceTools::HDIVtransformVALUE(unorientedTransformedBasisValues, jacobian, jacobianDeterminant, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (div,div) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp b/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp index 723b8f236698..e4729ec5e538 100644 --- a/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/HVOLStandardAssembly.hpp @@ -139,8 +139,11 @@ Intrepid2::ScalarView performStandardQuadratureHVOL(Intrepid2 auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); FunctionSpaceTools::HVOLtransformVALUE(unorientedTransformedBasisValues, jacobianDeterminant, basisValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); OrientationTools::modifyBasisByOrientation(transformedBasisValues, unorientedTransformedBasisValues, orientationsWorkset, basis.get()); + fstIntegrateCall->start(); FunctionSpaceTools::multiplyMeasure(transformedWeightedBasisValues, cellMeasures, transformedBasisValues); bool sumInto = true; // add the (value,value) integral to the (curl,curl) that we've already integrated FunctionSpaceTools::integrate(cellStiffnessSubview, transformedBasisValues, transformedWeightedBasisValues, sumInto); diff --git a/packages/intrepid2/assembly-examples/StandardAssembly.hpp b/packages/intrepid2/assembly-examples/StandardAssembly.hpp index a689306dfbf6..610918e7298d 100644 --- a/packages/intrepid2/assembly-examples/StandardAssembly.hpp +++ b/packages/intrepid2/assembly-examples/StandardAssembly.hpp @@ -110,10 +110,10 @@ namespace { } //! General assembly for two arbitrary bases and ops that uses the classic, generic Intrepid2 paths. -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStandardAssembly(Intrepid2::CellGeometry &geometry, int worksetSize, - const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, - const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, Teuchos::RCP< Kokkos::Array > vectorWeight2, double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) { using ExecutionSpace = typename DeviceType::execution_space; @@ -170,32 +170,72 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ViewType basis1Values = basis1->allocateOutputView(numPoints, op1); // (F1,P[,D]) ViewType basis2Values = basis2->allocateOutputView(numPoints, op2); // (F2,P[,D]) - ViewType orientedValues1, transformedValues1; - ViewType orientedValues2, transformedValues2, transformedWeightedValues2; + ViewType orientedValues1, transformedValues1, ultimateValues1; + ViewType orientedValues2, transformedValues2, ultimateValues2, ultimateWeightedValues2; - INTREPID2_TEST_FOR_EXCEPTION(basis1Values.rank() != basis2Values.rank(), std::invalid_argument, "basis1 and basis2 must agree on their rank under the respective operators"); + int ultimateBasis1Rank, ultimateBasis2Rank; + if (basis1Values.rank() == 2) + { + // the un-transformed values have shape (F,P): scalar values + // if vector weights supplied, these will increase the rank + ultimateBasis1Rank = (vectorWeight1 == Teuchos::null) ? 3 : 4; // (C,F,P) or (C,F,P,D) + } + else if (basis1Values.rank() == 3) + { + // the un-transformed values have shape (F,P,D): vector values + // if vector weights supplied, these will decrease the rank (we interpret as a dot product) + ultimateBasis1Rank = (vectorWeight1 == Teuchos::null) ? 4 : 3; // (C,F,P,D) or (C,F,P) + } + if (basis2Values.rank() == 2) + { + // the un-transformed values have shape (F,P): scalar values + // if vector weights supplied, these will increase the rank + ultimateBasis2Rank = (vectorWeight2 == Teuchos::null) ? 3 : 4; // (C,F,P) or (C,F,P,D) + } + else if (basis2Values.rank() == 3) + { + // the un-transformed values have shape (F,P,D): vector values + // if vector weights supplied, these will decrease the rank (we interpret as a dot product) + ultimateBasis2Rank = (vectorWeight2 == Teuchos::null) ? 4 : 3; // (C,F,P,D) or (C,F,P) + } - const bool scalarValued = (basis1Values.rank() == 2); // (F1,P): scalar-valued - if (scalarValued) + INTREPID2_TEST_FOR_EXCEPTION(ultimateBasis1Rank != ultimateBasis2Rank, std::invalid_argument, "basis1 and basis2 must agree on their rank under the respective operators"); + + if (basis1Values.rank() == 2) { orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints); - orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints); - transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints); + } + else + { + orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints, spaceDim); + transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints, spaceDim); + } + if (basis2Values.rank() == 2) + { + orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints); transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints); + } + else + { + orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints, spaceDim); + transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints, spaceDim); + } + + const bool scalarValued = (ultimateBasis1Rank == 3); // (C,F1,P): scalar-valued + if (scalarValued) + { + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints); - transformedWeightedValues2 = ViewType("transformed weighted values 2", worksetSize, numFields2, numPoints); + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints); } else // (F1, P, D) { - const int finalDim = basis1Values.extent_int(2); - orientedValues1 = ViewType("oriented values 1", worksetSize, numFields1, numPoints, finalDim); - orientedValues2 = ViewType("oriented values 2", worksetSize, numFields2, numPoints, finalDim); - - transformedValues1 = ViewType("transformed values 1", worksetSize, numFields1, numPoints, finalDim); - transformedValues2 = ViewType("transformed values 2", worksetSize, numFields2, numPoints, finalDim); + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints, spaceDim); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints, spaceDim); - transformedWeightedValues2 = ViewType("transformed weighted values 2", worksetSize, numFields2, numPoints, finalDim); + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints, spaceDim); } basis1->getValues(basis1Values, cubaturePoints, op1 ); @@ -218,6 +258,10 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ViewType jacobianDeterminant("jacobian determinant", worksetSize, numPoints); ViewType jacobian("jacobian", worksetSize, numPoints, spaceDim, spaceDim); ViewType jacobianInverse("jacobian inverse", worksetSize, numPoints, spaceDim, spaceDim); + + // Views used for vector-weighted case: + ViewType scalarTransformedValues1 ("scalar transformed values 1", worksetSize, numFields1, numPoints); + ViewType scalarTransformedWeightedValues2("scalar transformed weighted values 2", worksetSize, numFields2, numPoints); initialSetupTimer->stop(); @@ -243,23 +287,45 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell Kokkos::resize(jacobianInverse, numCellsInWorkset, numPoints, spaceDim, spaceDim); Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); Kokkos::resize(cellMeasures, numCellsInWorkset, numPoints); + Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); - if (scalarValued) + Kokkos::resize(scalarTransformedValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(scalarTransformedWeightedValues2, numCellsInWorkset, numFields2, numPoints); + + if (basis1Values.rank() == 2) + { + Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints); + } + else + { + Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints, spaceDim); + Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints, spaceDim); + } + if (basis2Values.rank() == 2) { - Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints); - Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints); - Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints); - Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints); - Kokkos::resize(transformedWeightedValues2, numCellsInWorkset, numFields2, numPoints); + Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints); + Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints); } else { - const int finalDim = basis1Values.extent_int(2); - Kokkos::resize(orientedValues1, numCellsInWorkset, numFields1, numPoints, finalDim); - Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); - Kokkos::resize(transformedValues1, numCellsInWorkset, numFields1, numPoints, finalDim); - Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); - Kokkos::resize(transformedWeightedValues2, numCellsInWorkset, numFields2, numPoints, finalDim); + Kokkos::resize(orientedValues2, numCellsInWorkset, numFields2, numPoints, spaceDim); + Kokkos::resize(transformedValues2, numCellsInWorkset, numFields2, numPoints, spaceDim); + } + + if (scalarValued) + { + Kokkos::resize(ultimateValues1, numCellsInWorkset, numFields1, numPoints); + Kokkos::resize(ultimateValues2, numCellsInWorkset, numFields2, numPoints); + + Kokkos::resize(ultimateWeightedValues2, numCellsInWorkset, numFields2, numPoints); + } + else // (F1, P, D) + { + ultimateValues1 = ViewType("ultimate values 1", worksetSize, numFields1, numPoints, spaceDim); + ultimateValues2 = ViewType("ultimate values 2", worksetSize, numFields2, numPoints, spaceDim); + + ultimateWeightedValues2 = ViewType("ultimate weighted values 2", worksetSize, numFields2, numPoints, spaceDim); } } jacobianAndCellMeasureTimer->start(); @@ -271,20 +337,94 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell ExecutionSpace().fence(); jacobianAndCellMeasureTimer->stop(); - // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. - fstIntegrateCall->start(); OrientationTools::modifyBasisByOrientation(orientedValues1, basis1Values, orientationsWorkset, basis1.get()); OrientationTools::modifyBasisByOrientation(orientedValues2, basis2Values, orientationsWorkset, basis2.get()); + + // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. + fstIntegrateCall->start(); transform(transformedValues1, orientedValues1, fs1, op1, jacobian, jacobianDeterminant, jacobianInverse); transform(transformedValues2, orientedValues2, fs2, op2, jacobian, jacobianDeterminant, jacobianInverse); - - transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields1+numFields2) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. - FunctionSpaceTools::multiplyMeasure(transformedWeightedValues2, cellMeasures, transformedValues2); - transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields1+numFields2) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); - FunctionSpaceTools::integrate(cellStiffnessSubview, transformedValues1, transformedWeightedValues2); + if (vectorWeight1 != Teuchos::null) + { + auto uWeight = *vectorWeight1; + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCellsInWorkset,numFields1,numPoints}); + if (transformedValues1.rank() == 4) + { + Kokkos::parallel_for("compute ultimateValues1", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + for (int d=0; d>({0,0,0},{numCellsInWorkset,numFields2,numPoints}); + if (transformedValues2.rank() == 4) + { + Kokkos::parallel_for("compute ultimateValues2", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar v_result = 0; + for (int d=0; dstop(); @@ -297,4 +437,18 @@ Intrepid2::ScalarView performStandardAssembly(Intrepid2::Cell return cellStiffness; } +//! General assembly for two arbitrary bases and ops that uses the classic, generic Intrepid2 paths. +template +Intrepid2::ScalarView performStandardAssembly(Intrepid2::CellGeometry &geometry, int worksetSize, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + Teuchos::RCP< Kokkos::Array > nullVectorWeight = Teuchos::null; + + return performStandardAssembly(geometry, worksetSize, + polyOrder1, fs1, op1, nullVectorWeight, + polyOrder2, fs2, op2, nullVectorWeight, + transformIntegrateFlopCount, jacobianCellMeasureFlopCount); +} #endif /* StandardAssembly_hpp */ diff --git a/packages/intrepid2/assembly-examples/StructuredAssembly.hpp b/packages/intrepid2/assembly-examples/StructuredAssembly.hpp index 24c87de7e90e..98a31da9c041 100644 --- a/packages/intrepid2/assembly-examples/StructuredAssembly.hpp +++ b/packages/intrepid2/assembly-examples/StructuredAssembly.hpp @@ -102,10 +102,10 @@ namespace { } //! General assembly for two arbitrary bases and ops that takes advantage of the new structured integration support, including support for sum factorization. -template +template // spaceDim and spaceDim2 should agree in value (differ in type) Intrepid2::ScalarView performStructuredAssembly(Intrepid2::CellGeometry &geometry, const int &worksetSize, - const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, - const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, Teuchos::RCP< Kokkos::Array > vectorWeight2, double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) { using namespace Intrepid2; @@ -151,7 +151,7 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce BasisValues basis2Values = basis2->allocateBasisValues(tensorCubaturePoints, op2); basis2->getValues(basis2Values, tensorCubaturePoints, op2); - + int cellOffset = 0; auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); @@ -169,18 +169,19 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce auto transformedBasis2ValuesTemp = transform(basis2Values, fs2, op2, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); auto integralData = IntegrationTools::allocateIntegralData(transformedBasis1ValuesTemp, cellMeasures, transformedBasis2ValuesTemp); - const int numPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numJacobianDataPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numPoints = jacobian.extent_int(1); // number of logical points // TODO: make the below determination accurate for diagonal/block-diagonal cases… (right now, will overcount) - const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); - const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); - const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numJacobianDataPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numJacobianDataPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numJacobianDataPoints); transformIntegrateFlopCount = 0; jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant - jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + jacobianCellMeasureFlopCount += numCells * numJacobianDataPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) auto refData = geometry.getJacobianRefData(tensorCubaturePoints); @@ -217,6 +218,49 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce auto transformedBasis1Values = transform(basis1Values, fs1, op1, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); auto transformedBasis2Values = transform(basis2Values, fs2, op2, jacobian, jacobianDet, jacobianInv, jacobianDetInv, jacobianDividedByJacobianDet); + if (vectorWeight1 != Teuchos::null) + { + ScalarView auView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + for (int d=0; d extents {numCellsInWorkset,numPoints,spaceDim}; + Kokkos::Array variationTypes {CONSTANT, CONSTANT, GENERAL}; + + Data au_data(auView, extents, variationTypes); + auto uTransform = Data::allocateMatVecResult(transformedBasis1Values.transform(), au_data, true); + uTransform.storeMatVec(transformedBasis1Values.transform(), au_data, true); // true: transpose basis transform when multiplying + transformedBasis1Values = Intrepid2::TransformedBasisValues(uTransform, basis1Values); + + // TODO: modify transformIntegrateFlopCount to include an estimate for above mat-vecs (but note that these will not be a dominant cost, especially at high order). + } + + if (vectorWeight2 != Teuchos::null) + { + ScalarView avView("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + + for (int d=0; d extents {numCellsInWorkset,numPoints,spaceDim}; + Kokkos::Array variationTypes {CONSTANT, CONSTANT, GENERAL}; + + Data av_data(avView, extents, variationTypes); + auto vTransform = Data::allocateMatVecResult(transformedBasis2Values.transform(), av_data, true); + vTransform.storeMatVec(transformedBasis2Values.transform(), av_data, true); // true: transpose basis transform when multiplying + transformedBasis2Values = Intrepid2::TransformedBasisValues(vTransform, basis2Values); + + // TODO: modify transformIntegrateFlopCount to include an estimate for above mat-vecs (but note that these will not be a dominant cost, especially at high order). + } + geometry.computeCellMeasure(cellMeasures, jacobianDet, tensorCubatureWeights); ExecutionSpace().fence(); jacobianAndCellMeasureTimer->stop(); @@ -243,6 +287,22 @@ Intrepid2::ScalarView performStructuredAssembly(Intrepid2::Ce cellOffset += worksetSize; } return cellStiffness; + +} + +//! General assembly for two arbitrary bases and ops that takes advantage of the new structured integration support, including support for sum factorization. +template +Intrepid2::ScalarView performStructuredAssembly(Intrepid2::CellGeometry &geometry, const int &worksetSize, + const int &polyOrder1, const Intrepid2::EFunctionSpace &fs1, const Intrepid2::EOperator &op1, + const int &polyOrder2, const Intrepid2::EFunctionSpace &fs2, const Intrepid2::EOperator &op2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + Teuchos::RCP< Kokkos::Array > nullVectorWeight = Teuchos::null; + + return performStructuredAssembly(geometry, worksetSize, + polyOrder1, fs1, op1, nullVectorWeight, + polyOrder2, fs2, op2, nullVectorWeight, + transformIntegrateFlopCount, jacobianCellMeasureFlopCount); } #endif /* StructuredAssembly_h */ diff --git a/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp new file mode 100644 index 000000000000..dc540e7e65a3 --- /dev/null +++ b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStandardAssembly.hpp @@ -0,0 +1,205 @@ +// +// VectorWeightedGRADGRADStandardAssembly.hpp +// Trilinos +// +// Created by Roberts, Nathan V on 5/13/24. +// + +#ifndef Intrepid2_VectorWeightedGRADGRADStandardAssembly_hpp +#define Intrepid2_VectorWeightedGRADGRADStandardAssembly_hpp + +#include "JacobianFlopEstimate.hpp" +#include "Intrepid2_OrientationTools.hpp" + +/** \file VectorWeightedGRADGRADStandardAssembly.hpp + \brief Locally assembles a vector-weighted Poisson matrix -- an array of shape (C,F,F), with formulation (a dot grad e_i, b dot grad e_j), using standard Intrepid2 methods; these do not algorithmically exploit geometric structure. + */ + +//! Version that uses the classic, generic Intrepid2 paths. +template +Intrepid2::ScalarView performStandardQuadratureVectorWeightedGRADGRAD(Intrepid2::CellGeometry &geometry, + const int &polyOrder, int worksetSize, + Teuchos::RCP> vectorWeight1, + Teuchos::RCP> vectorWeight2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + INTREPID2_TEST_FOR_EXCEPTION(vectorWeight1 == Teuchos::null, std::invalid_argument, "vectorWeight1 cannot be null"); + INTREPID2_TEST_FOR_EXCEPTION(vectorWeight2 == Teuchos::null, std::invalid_argument, "vectorWeight2 cannot be null"); + + using ExecutionSpace = typename DeviceType::execution_space; + int numVertices = 1; + for (int d=0; dstart(); + + using CellTools = Intrepid2::CellTools; + using FunctionSpaceTools = Intrepid2::FunctionSpaceTools; + + using namespace Intrepid2; + + using namespace std; + // dimensions of the returned view are (C,F,F) + auto fs = FUNCTION_SPACE_HGRAD; + + Intrepid2::ScalarView orientations("orientations", geometry.numCells() ); + geometry.orientations(orientations, 0, -1); + + shards::CellTopology cellTopo = geometry.cellTopology(); + + auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); + + int numFields = basis->getCardinality(); + int numCells = geometry.numCells(); + + if (worksetSize > numCells) worksetSize = numCells; + + // local stiffness matrices: + ScalarView cellStiffness("cell stiffness matrices",numCells,numFields,numFields); + + auto cubature = DefaultCubatureFactory::create(cellTopo,polyOrder*2); + int numPoints = cubature->getNumPoints(); + ScalarView cubaturePoints("cubature points",numPoints,spaceDim); + ScalarView cubatureWeights("cubature weights", numPoints); + + cubature->getCubature(cubaturePoints, cubatureWeights); + + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numPoints); + + // Allocate some intermediate containers + ScalarView basisValues ("basis values", numFields, numPoints ); + ScalarView basisGradValues("basis grad values", numFields, numPoints, spaceDim); + + ScalarView unorientedTransformedGradValues("unoriented transformed grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView transformedGradValues("transformed grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView transformedWeightedGradValues("transformed weighted grad values", worksetSize, numFields, numPoints, spaceDim); + ScalarView vectorWeightedTransformedGradValues("vector-weighted transformed grad values", worksetSize, numFields, numPoints); + ScalarView vectorWeightedTransformedWeightedGradValues("vector-weighted transformed weighted grad values", worksetSize, numFields, numPoints); + + basis->getValues(basisValues, cubaturePoints, OPERATOR_VALUE ); + basis->getValues(basisGradValues, cubaturePoints, OPERATOR_GRAD ); + + const int numNodesPerCell = geometry.numNodesPerCell(); + ScalarView expandedCellNodes("expanded cell nodes",numCells,numNodesPerCell,spaceDim); + Kokkos::parallel_for(Kokkos::RangePolicy(0,numCells), + KOKKOS_LAMBDA (const int &cellOrdinal) { + for (int nodeOrdinal=0; nodeOrdinal cellMeasures("cell measures", worksetSize, numPoints); + ScalarView jacobianDeterminant("jacobian determinant", worksetSize, numPoints); + ScalarView jacobian("jacobian", worksetSize, numPoints, spaceDim, spaceDim); + ScalarView jacobianInverse("jacobian inverse", worksetSize, numPoints, spaceDim, spaceDim); + + auto auView = getView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + + for (int d=0; d("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + for (int d=0; dstop(); + + transformIntegrateFlopCount = 0; + jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant + jacobianCellMeasureFlopCount += numCells * numPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + + int cellOffset = 0; + while (cellOffset < numCells) + { + int startCell = cellOffset; + int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; + + std::pair cellRange = {startCell, startCell+numCellsInWorkset}; + auto cellWorkset = Kokkos::subview(expandedCellNodes, cellRange, Kokkos::ALL(), Kokkos::ALL()); + auto orientationsWorkset = Kokkos::subview(orientations, cellRange); + + if (numCellsInWorkset != worksetSize) + { + Kokkos::resize(jacobian, numCellsInWorkset, numPoints, spaceDim, spaceDim); + Kokkos::resize(jacobianInverse, numCellsInWorkset, numPoints, spaceDim, spaceDim); + Kokkos::resize(jacobianDeterminant, numCellsInWorkset, numPoints); + Kokkos::resize(cellMeasures, numCellsInWorkset, numPoints); + Kokkos::resize(unorientedTransformedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + Kokkos::resize(transformedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + Kokkos::resize(transformedWeightedGradValues, numCellsInWorkset, numFields, numPoints, spaceDim); + } + jacobianAndCellMeasureTimer->start(); + CellTools::setJacobian(jacobian, cubaturePoints, cellWorkset, cellTopo); // accounted for outside loop, as numCells * flopsPerJacobianPerCell. + CellTools::setJacobianInv(jacobianInverse, jacobian); + CellTools::setJacobianDet(jacobianDeterminant, jacobian); + + FunctionSpaceTools::computeCellMeasure(cellMeasures, jacobianDeterminant, cubatureWeights); + ExecutionSpace().fence(); + jacobianAndCellMeasureTimer->stop(); + + // because structured integration performs transformations within integrate(), to get a fairer comparison here we include the transformation calls. + fstIntegrateCall->start(); + FunctionSpaceTools::HGRADtransformGRAD(unorientedTransformedGradValues, jacobianInverse, basisGradValues); + // we want to exclude orientation application in the core integration timing -- this time gets reported as "Other" + fstIntegrateCall->stop(); + OrientationTools::modifyBasisByOrientation(transformedGradValues, unorientedTransformedGradValues, + orientationsWorkset, basis.get()); + fstIntegrateCall->start(); + + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim) * (spaceDim - 1) * 2.0; // 2: one multiply, one add per (P,D) entry in the contraction. + FunctionSpaceTools::multiplyMeasure(transformedWeightedGradValues, cellMeasures, transformedGradValues); + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numPoints) * double(spaceDim); // multiply each entry of transformedGradValues: one flop for each. + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCellsInWorkset,numFields,numPoints}); + Kokkos::parallel_for("compute expanded_{u,v}TransformedGradValues", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + Scalar v_result_weighted = 0; + for (int d=0; dstop(); + + transformIntegrateFlopCount += double(numCellsInWorkset) * double(numFields) * double(numFields) * double(numPoints * 2); // 2: one multiply, one add per P entry in the contraction. + + cellOffset += worksetSize; + } +// std::cout << "standard integration, approximateFlopCount: " << approximateFlopCount << std::endl; + return cellStiffness; +} + +#endif /* VectorWeightedGRADGRADStandardAssembly_h */ diff --git a/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp new file mode 100644 index 000000000000..a1d640607720 --- /dev/null +++ b/packages/intrepid2/assembly-examples/VectorWeightedGRADGRADStructuredAssembly.hpp @@ -0,0 +1,187 @@ +// +// VectorWeightedGRADGRADStructuredAssembly.hpp +// Trilinos +// +// Created by Roberts, Nathan V on 5/13/24. +// + +#ifndef VectorWeightedGRADGRADStructuredAssembly_h +#define VectorWeightedGRADGRADStructuredAssembly_h + +#include "JacobianFlopEstimate.hpp" +#include "Intrepid2_OrientationTools.hpp" + +/** \file VectorWeightedGRADGRADStructuredAssembly.hpp + \brief Locally assembles a vector-weighted Poisson matrix -- an array of shape (C,F,F), with formulation (a dot grad e_i, b dot grad e_j), using "structured" Intrepid2 methods; these algorithmically exploit geometric structure as expressed in the provided CellGeometry. + */ + +//! Version that takes advantage of new structured integration support, including sum factorization. +template +Intrepid2::ScalarView performStructuredQuadratureVectorWeightedGRADGRAD(Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + Teuchos::RCP> vectorWeight1, + Teuchos::RCP> vectorWeight2, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) +{ + using namespace Intrepid2; + + using ExecutionSpace = typename DeviceType::execution_space; + + int numVertices = 1; + for (int d=0; dstart(); + using namespace std; + using FunctionSpaceTools = FunctionSpaceTools; + using IntegrationTools = IntegrationTools; + // dimensions of the returned view are (C,F,F) + auto fs = FUNCTION_SPACE_HGRAD; + + Intrepid2::ScalarView orientations("orientations", geometry.numCells() ); + geometry.orientations(orientations, 0, -1); + + shards::CellTopology cellTopo = geometry.cellTopology(); + + auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); + + int numFields = basis->getCardinality(); + int numCells = geometry.numCells(); + + // local stiffness matrix: + ScalarView cellStiffness("cell stiffness matrices",numCells,numFields,numFields); + ScalarView worksetCellStiffness("cell stiffness workset matrices",worksetSize,numFields,numFields); + + auto cubature = DefaultCubatureFactory::create(cellTopo,polyOrder*2); + auto tensorCubatureWeights = cubature->allocateCubatureWeights(); + TensorPoints tensorCubaturePoints = cubature->allocateCubaturePoints(); + + cubature->getCubature(tensorCubaturePoints, tensorCubatureWeights); + + EOperator op = OPERATOR_GRAD; + BasisValues gradientValues = basis->allocateBasisValues(tensorCubaturePoints, op); + basis->getValues(gradientValues, tensorCubaturePoints, op); + + // goal here is to do a weighted Poisson; i.e. (f grad u, grad v) on each cell + + int cellOffset = 0; + + auto jacobianAndCellMeasureTimer = Teuchos::TimeMonitor::getNewTimer("Jacobians"); + auto fstIntegrateCall = Teuchos::TimeMonitor::getNewTimer("transform + integrate()"); + + Data jacobian = geometry.allocateJacobianData(tensorCubaturePoints, 0, worksetSize); + Data jacobianDet = CellTools::allocateJacobianDet(jacobian); + Data jacobianInv = CellTools::allocateJacobianInv(jacobian); + TensorData cellMeasures = geometry.allocateCellMeasure(jacobianDet, tensorCubatureWeights); + + // lazily-evaluated transformed gradient values (temporary to allow integralData allocation) + auto transformedGradientValuesTemp = FunctionSpaceTools::getHGRADtransformGRAD(jacobianInv, gradientValues); + auto integralData = IntegrationTools::allocateIntegralData(transformedGradientValuesTemp, cellMeasures, transformedGradientValuesTemp); + + const int numJacobianDataPoints = jacobian.getDataExtent(1); // data extent will be 1 for affine, numPoints for other cases + const int numPoints = jacobian.extent_int(1); // logical point count + + // TODO: make the below determination accurate for diagonal/block-diagonal cases… (right now, will overcount) + const double flopsPerJacobianPerCell = flopsPerJacobian(spaceDim, numJacobianDataPoints, numVertices); + const double flopsPerJacobianDetPerCell = flopsPerJacobianDet(spaceDim, numJacobianDataPoints); + const double flopsPerJacobianInvPerCell = flopsPerJacobianInverse(spaceDim, numJacobianDataPoints); + + transformIntegrateFlopCount = 0; + jacobianCellMeasureFlopCount = numCells * flopsPerJacobianPerCell; // jacobian itself + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianInvPerCell; // inverse + jacobianCellMeasureFlopCount += numCells * flopsPerJacobianDetPerCell; // determinant + jacobianCellMeasureFlopCount += numCells * numJacobianDataPoints; // cell measure: (C,P) gets weighted with cubature weights of shape (P) + + auto refData = geometry.getJacobianRefData(tensorCubaturePoints); + + ScalarView auView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + + for (int d=0; d avView("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + + for (int d=0; d au_data(auView, Kokkos::Array{worksetSize,numPoints,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + Data av_data(avView, Kokkos::Array{worksetSize,numPoints,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + + auto uTransform = Data::allocateMatVecResult(jacobianInv, au_data, true); + auto vTransform = Data::allocateMatVecResult(jacobianInv, av_data, true); + + initialSetupTimer->stop(); + while (cellOffset < numCells) + { + int startCell = cellOffset; + int numCellsInWorkset = (cellOffset + worksetSize - 1 < numCells) ? worksetSize : numCells - startCell; + int endCell = numCellsInWorkset + startCell; + + jacobianAndCellMeasureTimer->start(); + if (numCellsInWorkset != worksetSize) + { + const int CELL_DIM = 0; // first dimension corresponds to cell + jacobian.setExtent (CELL_DIM, numCellsInWorkset); + jacobianDet.setExtent (CELL_DIM, numCellsInWorkset); + jacobianInv.setExtent (CELL_DIM, numCellsInWorkset); + integralData.setExtent(CELL_DIM, numCellsInWorkset); + au_data.setExtent (CELL_DIM, numCellsInWorkset); + av_data.setExtent (CELL_DIM, numCellsInWorkset); + uTransform.setExtent (CELL_DIM, numCellsInWorkset); + vTransform.setExtent (CELL_DIM, numCellsInWorkset); + + Kokkos::resize(worksetCellStiffness, numCellsInWorkset, numFields, numFields); + + // cellMeasures is a TensorData object with separateFirstComponent_ = true; the below sets the cell dimension… + cellMeasures.setFirstComponentExtentInDimension0(numCellsInWorkset); + } + + geometry.setJacobian(jacobian, tensorCubaturePoints, refData, startCell, endCell); + CellTools::setJacobianDet(jacobianDet, jacobian); + CellTools::setJacobianInv(jacobianInv, jacobian); + + // lazily-evaluated transformed gradient values: + geometry.computeCellMeasure(cellMeasures, jacobianDet, tensorCubatureWeights); + ExecutionSpace().fence(); + jacobianAndCellMeasureTimer->stop(); + + uTransform.storeMatVec(jacobianInv, au_data, true); // true: transpose jacobianInv when multiplying + vTransform.storeMatVec(jacobianInv, av_data, true); // true: transpose jacobianInv when multiplying + + Intrepid2::TransformedBasisValues uTransformedGradientValues(uTransform, gradientValues); + Intrepid2::TransformedBasisValues vTransformedGradientValues(vTransform, gradientValues); + + bool sumInto = false; + double approximateFlopCountIntegrateWorkset = 0; + fstIntegrateCall->start(); + IntegrationTools::integrate(integralData, uTransformedGradientValues, cellMeasures, vTransformedGradientValues, sumInto, &approximateFlopCountIntegrateWorkset); + ExecutionSpace().fence(); + fstIntegrateCall->stop(); + + // modify integrals by orientations + std::pair cellRange = {startCell, endCell}; + auto orientationsWorkset = Kokkos::subview(orientations, cellRange); + OrientationTools::modifyMatrixByOrientation(worksetCellStiffness, integralData.getUnderlyingView(), + orientationsWorkset, basis.get(), basis.get()); + + // copy into cellStiffness container. + auto cellStiffnessSubview = Kokkos::subview(cellStiffness, cellRange, Kokkos::ALL(), Kokkos::ALL()); + Kokkos::deep_copy(cellStiffnessSubview, worksetCellStiffness); + + transformIntegrateFlopCount += approximateFlopCountIntegrateWorkset; + + cellOffset += worksetSize; + } + return cellStiffness; +} + +#endif /* VectorWeightedGRADGRADStructuredAssembly_h */ diff --git a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp index 9efe77a264f9..a9eb6cab7145 100644 --- a/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp +++ b/packages/intrepid2/src/Cell/Intrepid2_CellTools.hpp @@ -352,11 +352,11 @@ namespace Intrepid2 { /** \brief Computes reciprocals of determinants corresponding to the Jacobians in the Data container provided - \param jacobianDet [out] - data with shape (C,P), as returned by CellTools::allocateJacobianDet() - \param jacobian [in] - data with shape (C,P,D,D), as returned by CellGeometry::allocateJacobianData() + \param jacobianDetInv [out] - data with shape (C,P), as returned by CellTools::allocateJacobianDet() + \param jacobian [in] - data with shape (C,P,D,D), as returned by CellGeometry::allocateJacobianData() */ template - static void setJacobianDetInv( Data & jacobianDet, + static void setJacobianDetInv( Data & jacobianDetInv, const Data & jacobian); /** \brief Computes determinants corresponding to the Jacobians in the Data container provided diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp index 9750acf87e4d..588957c915a5 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_BasisValues.hpp @@ -31,18 +31,18 @@ namespace Intrepid2 { - template + template class BasisValues { - using TensorDataType = TensorData; - using VectorDataType = VectorData; + using TensorDataType = TensorData; + using VectorDataType = VectorData; Kokkos::Array tensorDataFamilies_; VectorDataType vectorData_; int numTensorDataFamilies_ = -1; - Kokkos::View ordinalFilter_; + Kokkos::View ordinalFilter_; public: //! Constructor for scalar-valued BasisValues with a single family of values. BasisValues(TensorDataType tensorData) @@ -76,8 +76,8 @@ namespace Intrepid2 //! copy-like constructor for differing execution spaces. This does a deep copy of underlying views. - template::value>::type> - BasisValues(const BasisValues &basisValues) + template::value>::type> + BasisValues(const BasisValues &basisValues) : vectorData_(basisValues.vectorData()), numTensorDataFamilies_(basisValues.numTensorDataFamilies()) @@ -85,16 +85,16 @@ namespace Intrepid2 auto otherFamilies = basisValues.tensorDataFamilies(); for (int family=0; family(otherFamilies[family]); + tensorDataFamilies_[family] = TensorData(otherFamilies[family]); } auto otherOrdinalFilter = basisValues.ordinalFilter(); - ordinalFilter_ = Kokkos::View("BasisValues::ordinalFilter_",otherOrdinalFilter.extent(0)); + ordinalFilter_ = Kokkos::View("BasisValues::ordinalFilter_",otherOrdinalFilter.extent(0)); Kokkos::deep_copy(ordinalFilter_, otherOrdinalFilter); } //! field start and length must align with families in vectorData_ or tensorDataFamilies_ (whichever is valid). - BasisValues basisValuesForFields(const int &fieldStartOrdinal, const int &numFields) + BasisValues basisValuesForFields(const int &fieldStartOrdinal, const int &numFields) { int familyStartOrdinal = -1, familyEndOrdinal = -1; const int familyCount = this->numFamilies(); @@ -118,12 +118,12 @@ namespace Intrepid2 { tensorDataFamilies[i-familyStartOrdinal] = tensorDataFamilies_[i]; } - return BasisValues(tensorDataFamilies); + return BasisValues(tensorDataFamilies); } else { const int componentCount = vectorData_.numComponents(); - std::vector< std::vector > > vectorComponents(numFamiliesInFieldSpan, std::vector >(componentCount)); + std::vector< std::vector > > vectorComponents(numFamiliesInFieldSpan, std::vector >(componentCount)); for (int i=familyStartOrdinal; i<=familyEndOrdinal; i++) { for (int j=0; j(vectorComponents); + return BasisValues(vectorComponents); } } @@ -327,16 +327,22 @@ namespace Intrepid2 } } - void setOrdinalFilter(Kokkos::View ordinalFilter) + void setOrdinalFilter(Kokkos::View ordinalFilter) { ordinalFilter_ = ordinalFilter; } - Kokkos::View ordinalFilter() const + Kokkos::View ordinalFilter() const { return ordinalFilter_; } }; -} + + template + KOKKOS_INLINE_FUNCTION unsigned rank(const BasisValues &basisValues) + { + return basisValues.rank(); + } +} // namespace Intrepid2 #endif /* Intrepid2_BasisValues_h */ diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp index 674630be879b..a8e57b15d5ef 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_IntegrationToolsDef.hpp @@ -15,6 +15,7 @@ #ifndef __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ #define __INTREPID2_INTEGRATIONTOOLS_DEF_HPP__ +#include "Intrepid2_DataTools.hpp" #include "Intrepid2_FunctorIterator.hpp" #include "Intrepid2_TensorArgumentIterator.hpp" @@ -1210,7 +1211,6 @@ namespace Intrepid2 { const int GyEntryCount = pointBounds_z; // for each thread: store one Gy value per z coordinate Kokkos::View GxIntegrals; // for caching Gx values: we integrate out the first component dimension for each coordinate in the remaining dimensios Kokkos::View GyIntegrals; // for caching Gy values (each thread gets a stack, of the same height as tensorComponents - 1) - Kokkos::View GzIntegral; // for one Gz value that we sum into before summing into the destination matrix Kokkos::View pointWeights; // indexed by (expanded) point; stores M_ab * cell measure; shared by team Kokkos::View leftFields_x, rightFields_x; @@ -1219,7 +1219,6 @@ namespace Intrepid2 { if (fad_size_output_ > 0) { GxIntegrals = Kokkos::View(teamMember.team_shmem(), pointsInNonzeroComponentDimensions, fad_size_output_); GyIntegrals = Kokkos::View(teamMember.team_shmem(), GyEntryCount * numThreads, fad_size_output_); - GzIntegral = Kokkos::View(teamMember.team_shmem(), numThreads, fad_size_output_); pointWeights = Kokkos::View (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_); leftFields_x = Kokkos::View(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x, fad_size_output_); @@ -1232,7 +1231,6 @@ namespace Intrepid2 { else { GxIntegrals = Kokkos::View(teamMember.team_shmem(), pointsInNonzeroComponentDimensions); GyIntegrals = Kokkos::View(teamMember.team_shmem(), GyEntryCount * numThreads); - GzIntegral = Kokkos::View(teamMember.team_shmem(), numThreads); pointWeights = Kokkos::View (teamMember.team_shmem(), composedTransform_.extent_int(1)); leftFields_x = Kokkos::View(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x); @@ -1376,43 +1374,67 @@ namespace Intrepid2 { const int i1 = i1j1 % leftFieldBounds_y; const int j1 = i1j1 / leftFieldBounds_y; - int Gy_index = GyEntryCount * threadNumber; // thread-relative index into GyIntegrals container; store one value per z coordinate + int Gy_index_offset = GyEntryCount * threadNumber; // thread-relative index into GyIntegrals container; store one value per z coordinate - int pointEnumerationIndex = 0; // incremented at bottom of lz loop below. for (int lz=0; lz(integralView, cellDataOrdinal, i, j) += Gz; + Kokkos::single (Kokkos::PerThread(teamMember), [&] () { + integralViewEntry(integralView, cellDataOrdinal, i, j) += Gz; + }); } } }); @@ -1766,7 +1790,6 @@ namespace Intrepid2 { { shmem_size += Kokkos::View::shmem_size(pointsInNonzeroComponentDimensions, fad_size_output_); // GxIntegrals: entries with x integrated away shmem_size += Kokkos::View::shmem_size(GyEntryCount * numThreads, fad_size_output_); // GyIntegrals: entries with x,y integrated away - shmem_size += Kokkos::View::shmem_size( 1 * numThreads, fad_size_output_); // GzIntegral: entry with x,y,z integrated away shmem_size += Kokkos::View::shmem_size (composedTransform_.extent_int(1), fad_size_output_); // pointWeights shmem_size += Kokkos::View::shmem_size( leftFieldBounds_[0], pointBounds_[0], fad_size_output_); // leftFields_x @@ -1780,7 +1803,6 @@ namespace Intrepid2 { { shmem_size += Kokkos::View::shmem_size(pointsInNonzeroComponentDimensions); // GxIntegrals: entries with x integrated away shmem_size += Kokkos::View::shmem_size(GyEntryCount * numThreads); // GyIntegrals: entries with x,y integrated away - shmem_size += Kokkos::View::shmem_size( 1 * numThreads); // GzIntegral: entry with x,y,z integrated away shmem_size += Kokkos::View::shmem_size (composedTransform_.extent_int(1)); // pointWeights shmem_size += Kokkos::View::shmem_size( leftFieldBounds_[0], pointBounds_[0]); // leftFields_x @@ -1940,16 +1962,14 @@ void IntegrationTools::integrate(Data integrals, // we require that the number of tensor components in the vectors are the same for each vector entry // this is not strictly necessary, but it makes implementation easier, and we don't at present anticipate other use cases int numTensorComponentsLeft = -1; - const bool isVectorValued = basisValuesLeft.vectorData().isValid(); - if (isVectorValued) + const bool leftIsVectorValued = basisValuesLeft.vectorData().isValid(); + + if (leftIsVectorValued) { - const bool rightIsVectorValued = basisValuesRight.vectorData().isValid(); - INTREPID2_TEST_FOR_EXCEPTION(!rightIsVectorValued, std::invalid_argument, "left and right must either both be vector-valued, or both scalar-valued"); const auto &refVectorLeft = basisValuesLeft.vectorData(); int numFamiliesLeft = refVectorLeft.numFamilies(); int numVectorComponentsLeft = refVectorLeft.numComponents(); Kokkos::Array maxFieldsForComponentLeft {0,0,0,0,0,0,0}; - Kokkos::Array maxFieldsForComponentRight {0,0,0,0,0,0,0}; for (int familyOrdinal=0; familyOrdinal::integrate(Data integrals, } } } - int numTensorComponentsRight = -1; + } + else + { + numTensorComponentsLeft = basisValuesLeft.basisValues().tensorData(0).numTensorComponents(); // family ordinal 0 + for (int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++) + { + INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "All families must match in the number of tensor components"); + } + } + int numTensorComponentsRight = -1; + const bool rightIsVectorValued = basisValuesRight.vectorData().isValid(); + + if (rightIsVectorValued) + { const auto &refVectorRight = basisValuesRight.vectorData(); int numFamiliesRight = refVectorRight.numFamilies(); int numVectorComponentsRight = refVectorRight.numComponents(); + Kokkos::Array maxFieldsForComponentRight {0,0,0,0,0,0,0}; for (int familyOrdinal=0; familyOrdinal::integrate(Data integrals, } } } - INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != numVectorComponentsRight, std::invalid_argument, "Left and right vector entries must have the same number of tensorial components"); + INTREPID2_TEST_FOR_EXCEPTION(numTensorComponentsRight != numTensorComponentsLeft, std::invalid_argument, "Right families must match left in the number of tensor components"); } else { - numTensorComponentsLeft = basisValuesLeft.basisValues().tensorData(0).numTensorComponents(); // family ordinal 0 - for (int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++) - { - INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "All families must match in the number of tensor components"); - } - - // check that right tensor component count also agrees + // check that right tensor component count agrees with left for (int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++) { INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument, "Right families must match left in the number of tensor components"); @@ -2042,11 +2070,11 @@ void IntegrationTools::integrate(Data integrals, int a_offset = 0; // left vector component offset int leftFieldOffset = basisValuesLeft.basisValues().familyFieldOrdinalOffset(leftFamilyOrdinal); - const int leftVectorComponentCount = isVectorValued ? basisValuesLeft.vectorData().numComponents() : 1; + const int leftVectorComponentCount = leftIsVectorValued ? basisValuesLeft.vectorData().numComponents() : 1; for (int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++) { - TensorData leftComponent = isVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftVectorComponentOrdinal) - : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); + TensorData leftComponent = leftIsVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftVectorComponentOrdinal) + : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); if (!leftComponent.isValid()) { a_offset++; // empty components are understood to take up one dimension @@ -2061,11 +2089,11 @@ void IntegrationTools::integrate(Data integrals, int b_offset = 0; // right vector component offset int rightFieldOffset = basisValuesRight.vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal); - const int rightVectorComponentCount = isVectorValued ? basisValuesRight.vectorData().numComponents() : 1; + const int rightVectorComponentCount = rightIsVectorValued ? basisValuesRight.vectorData().numComponents() : 1; for (int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++) { - TensorData rightComponent = isVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightVectorComponentOrdinal) - : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); + TensorData rightComponent = rightIsVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightVectorComponentOrdinal) + : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); if (!rightComponent.isValid()) { b_offset++; // empty components are understood to take up one dimension @@ -2223,15 +2251,23 @@ void IntegrationTools::integrate(Data integrals, const bool transposeRight = false; // auto timer = Teuchos::TimeMonitor::getNewTimer("mat-mat"); // timer->start(); - // transforms can be matrices -- (C,P,D,D): rank 4 -- or scalar weights -- (C,P): rank 2 - const bool matrixTransform = (leftTransform.rank() == 4) || (rightTransform.rank() == 4); + // transforms can be matrices -- (C,P,D,D): rank 4 -- or scalar weights -- (C,P): rank 2 -- or vector weights -- (C,P,D): rank 3 Data composedTransform; // invalid/empty transforms are used when the identity is intended. + const int leftRank = leftTransform.rank(); + const int rightRank = rightTransform.rank(); + if (leftTransform.isValid() && rightTransform.isValid()) { - if (matrixTransform) + const bool bothRank4 = (leftRank == 4) && (rightRank == 4); + const bool bothRank3 = (leftRank == 3) && (rightRank == 3); + const bool bothRank2 = (leftRank == 2) && (rightRank == 2); + const bool ranks32 = ((leftRank == 3) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 3)); + const bool ranks42 = ((leftRank == 4) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 4)); + + if (bothRank4) // (C,P,D,D) { - composedTransform = leftTransform.allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform); + composedTransform = Data::allocateMatMatResult(transposeLeft, leftTransform, transposeRight, rightTransform); composedTransform.storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform); // if the composedTransform matrices are full, the following is a good estimate. If they have some diagonal portions, this will overcount. @@ -2240,12 +2276,41 @@ void IntegrationTools::integrate(Data integrals, *approximateFlops += composedTransform.getUnderlyingViewSize() * (spaceDim - 1) * 2; } } - else + else if (bothRank3) // (C,P,D) + { + // re-cast leftTransform as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = leftTransform.getExtents(); + auto variationTypes = leftTransform.getVariationTypes(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto leftTransformMatrix = leftTransform.shallowCopy(newRank, extents, variationTypes); + + // re-cast rightTransform as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P) + extents = rightTransform.getExtents(); + variationTypes = rightTransform.getVariationTypes(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto rightTransformMatrix = rightTransform.shallowCopy(newRank, extents, variationTypes); + + composedTransform = Data::allocateMatMatResult(transposeLeft, leftTransformMatrix, transposeRight, rightTransformMatrix); // false: don't transpose + composedTransform.storeMatMat(transposeLeft, leftTransformMatrix, transposeRight, rightTransformMatrix); + + if (approximateFlops != NULL) + { + *approximateFlops += composedTransform.getUnderlyingViewSize(); // one multiply per entry + } + } + else if (bothRank2) { composedTransform = leftTransform.allocateInPlaceCombinationResult(leftTransform, rightTransform); composedTransform.storeInPlaceProduct(leftTransform, rightTransform); - // re-cast composedTranform as a rank 4 (C,P,D,D) object -- a 1 x 1 matrix at each (C,P). + // re-cast composedTranform as a rank 4 (C,P,1,1) object -- a 1 x 1 matrix at each (C,P). const int newRank = 4; auto extents = composedTransform.getExtents(); auto variationTypes = composedTransform.getVariationTypes(); @@ -2255,16 +2320,100 @@ void IntegrationTools::integrate(Data integrals, *approximateFlops += composedTransform.getUnderlyingViewSize(); // one multiply per entry } } + else if (ranks32) // rank 2 / rank 3 combination. + { + const auto & rank3Transform = (leftRank == 3) ? leftTransform : rightTransform; + const auto & rank2Transform = (leftRank == 2) ? leftTransform : rightTransform; + + composedTransform = DataTools::multiplyByCPWeights(rank3Transform, rank2Transform); + + // re-cast composedTransform as a rank 4 object: + // logically, the original rank-3 transform can be understood as a 1xD matrix. The composed transform is leftTransform^T * rightTransform, so: + // - if left has the rank-3 transform, composedTransform should be a (C,P,D,1) object -- a D x 1 matrix at each (C,P). + // - if right has the rank-3 transform, composedTransform should be a (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = composedTransform.getExtents(); + auto variationTypes = composedTransform.getVariationTypes(); + if (leftRank == 3) + { + // extents[3] and variationTypes[3] will already be 1 and CONSTANT, respectively + // extents[3] = 1; + // variationTypes[3] = CONSTANT; + } + else + { + extents[3] = extents[2]; + extents[2] = 1; + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + } + composedTransform = composedTransform.shallowCopy(newRank, extents, variationTypes); + } + else if (ranks42) // rank 4 / rank 2 combination. + { + if (leftRank == 4) + { + // want to transpose left matrix, and multiply by the values from rightTransform + // start with the multiplication: + auto composedTransformTransposed = DataTools::multiplyByCPWeights(leftTransform, rightTransform); + composedTransform = DataTools::transposeMatrix(composedTransformTransposed); + } + else // (leftRank == 2) + { + composedTransform = DataTools::multiplyByCPWeights(rightTransform, leftTransform); + } + } + else + { + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else if (leftTransform.isValid()) { // rightTransform is the identity - composedTransform = leftTransform; + switch (leftRank) + { + case 4: composedTransform = DataTools::transposeMatrix(leftTransform); break; + case 3: + { + // - if left has the rank-3 transform, composedTransform should be a (C,P,D,1) object -- a D x 1 matrix at each (C,P). + const int newRank = 4; + auto extents = leftTransform.getExtents(); + auto variationTypes = leftTransform.getVariationTypes(); + + composedTransform = leftTransform.shallowCopy(newRank, extents, variationTypes); + } + break; + case 2: composedTransform = leftTransform; break; + default: + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else if (rightTransform.isValid()) { // leftTransform is the identity composedTransform = rightTransform; + switch (rightRank) + { + case 4: composedTransform = rightTransform; break; + case 3: + { + // - if right has the rank-3 transform, composedTransform should be a (C,P,1,D) object -- a 1 x D matrix at each (C,P). + const int newRank = 4; + auto extents = rightTransform.getExtents(); + auto variationTypes = rightTransform.getVariationTypes(); + extents[3] = extents[2]; + variationTypes[3] = variationTypes[2]; + extents[2] = 1; + variationTypes[2] = CONSTANT; + + composedTransform = rightTransform.shallowCopy(newRank, extents, variationTypes); + } + break; + case 2: composedTransform = rightTransform; break; + default: + INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported transform combination"); + } } else { @@ -2283,8 +2432,8 @@ void IntegrationTools::integrate(Data integrals, const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies(); const int rightFamilyCount = basisValuesRight.basisValues().numFamilies(); - const int leftComponentCount = isVectorValued ? basisValuesLeft. vectorData().numComponents() : 1; - const int rightComponentCount = isVectorValued ? basisValuesRight.vectorData().numComponents() : 1; + const int leftComponentCount = leftIsVectorValued ? basisValuesLeft. vectorData().numComponents() : 1; + const int rightComponentCount = rightIsVectorValued ? basisValuesRight.vectorData().numComponents() : 1; int leftFieldOrdinalOffset = 0; // keeps track of the number of fields in prior families for (int leftFamilyOrdinal=0; leftFamilyOrdinal::integrate(Data integrals, bool haveLaunchedContributionToCurrentFamilyLeft = false; // helps to track whether we need a Kokkos::fence before launching a kernel. for (int leftComponentOrdinal=0; leftComponentOrdinal leftComponent = isVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftComponentOrdinal) - : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); + TensorData leftComponent = leftIsVectorValued ? basisValuesLeft.vectorData().getComponent(leftFamilyOrdinal, leftComponentOrdinal) + : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal); if (!leftComponent.isValid()) { // represents zero @@ -2313,8 +2462,8 @@ void IntegrationTools::integrate(Data integrals, int b_offset = 0; for (int rightComponentOrdinal=0; rightComponentOrdinal rightComponent = isVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightComponentOrdinal) - : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); + TensorData rightComponent = rightIsVectorValued ? basisValuesRight.vectorData().getComponent(rightFamilyOrdinal, rightComponentOrdinal) + : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal); if (!rightComponent.isValid()) { // represents zero @@ -2416,13 +2565,13 @@ void IntegrationTools::integrate(Data integrals, } } } - b_offset += isVectorValued ? basisValuesRight.vectorData().numDimsForComponent(rightComponentOrdinal) : 1; + b_offset += rightIsVectorValued ? basisValuesRight.vectorData().numDimsForComponent(rightComponentOrdinal) : 1; } - rightFieldOrdinalOffset += isVectorValued ? basisValuesRight.vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal); + rightFieldOrdinalOffset += rightIsVectorValued ? basisValuesRight.vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal); } - a_offset += isVectorValued ? basisValuesLeft.vectorData().numDimsForComponent(leftComponentOrdinal) : 1; + a_offset += leftIsVectorValued ? basisValuesLeft.vectorData().numDimsForComponent(leftComponentOrdinal) : 1; } - leftFieldOrdinalOffset += isVectorValued ? basisValuesLeft.vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal); + leftFieldOrdinalOffset += leftIsVectorValued ? basisValuesLeft.vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal); } } // if (approximateFlops != NULL) diff --git a/packages/intrepid2/src/Shared/Intrepid2_Data.hpp b/packages/intrepid2/src/Shared/Intrepid2_Data.hpp index 6c7db78d673d..67a713151ada 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_Data.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_Data.hpp @@ -34,7 +34,7 @@ namespace Intrepid2 { \class Intrepid2::ZeroView \brief A singleton class for a DynRankView containing exactly one zero entry. (Technically, the entry is DataScalar(), the default value for the scalar type.) This allows View-wrapping classes to return a reference to zero, even when that zero is not explicitly stored in the wrapped views. -This is used by Interpid2::Data for its getEntry() and getWritableEntry() methods. +This is used by Intrepid2::Data for its getEntry() and getWritableEntry() methods. \note There is no protection against the zero value being overwritten; perhaps we should add some (i.e., const-qualify DataScalar). Because of implementation details in Intrepid2::Data, we don't do so yet. */ @@ -1490,43 +1490,37 @@ class ZeroView { resultExtents[i] = 1; } - ScalarView data; + ScalarView data; // new view will match this one in layout and fad dimension, if any + auto viewToMatch = A_MatData.getUnderlyingView(); if (resultNumActiveDims == 1) { - auto viewToMatch = A_MatData.getUnderlyingView1(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0]); } else if (resultNumActiveDims == 2) { - auto viewToMatch = A_MatData.getUnderlyingView2(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1]); } else if (resultNumActiveDims == 3) { - auto viewToMatch = A_MatData.getUnderlyingView3(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2]); } else if (resultNumActiveDims == 4) { - auto viewToMatch = A_MatData.getUnderlyingView4(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3]); } else if (resultNumActiveDims == 5) { - auto viewToMatch = A_MatData.getUnderlyingView5(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4]); } else if (resultNumActiveDims == 6) { - auto viewToMatch = A_MatData.getUnderlyingView6(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4], resultDataDims[5]); } else // resultNumActiveDims == 7 { - auto viewToMatch = A_MatData.getUnderlyingView7(); // new view will match this one in layout and fad dimension, if any data = getMatchingViewWithLabel(viewToMatch, "Data mat-mat result", resultDataDims[0], resultDataDims[1], resultDataDims[2], resultDataDims[3], resultDataDims[4], resultDataDims[5], resultDataDims[6]); } @@ -1534,6 +1528,37 @@ class ZeroView { return Data(data,resultRank,resultExtents,resultVariationTypes,resultBlockPlusDiagonalLastNonDiagonal); } + //! Constructs a container suitable for storing the result of a contraction over the final dimensions of the two provided containers. The two containers must have the same logical shape. + //! \see storeInPlaceCombination() + //! \param A [in] - the first data container. + //! \param B [in] - the second data container. Must have the same logical shape as A. + //! \param numContractionDims [in] - the number of dimensions over which the contraction should take place. + //! \return A numContractionDims-rank-lower container with the same logical shape as A and B in all but the last dimensions. + static Data allocateContractionResult( const Data &A, const Data &B, const int &numContractionDims ) + { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.rank() != B.rank(), std::invalid_argument, "A and B must have the same logical shape"); + const int rank = A.rank(); + const int resultRank = rank - numContractionDims; + std::vector dimInfo(resultRank); + for (int d=0; d result(dimInfo); + return result; + } + + //! Constructs a container suitable for storing the result of a contraction over the final dimension of the two provided containers. The two containers must have the same logical shape. + //! \see storeInPlaceCombination() + //! \param A [in] - the first data container. + //! \param B [in] - the second data container. Must have the same logical shape as A. + //! \return A 1-rank-lower container with the same logical shape as A and B in all but the last dimension. + static Data allocateDotProductResult( const Data &A, const Data &B ) + { + return allocateContractionResult(A, B, 1); + } + //! Constructs a container suitable for storing the result of a matrix-vector multiply corresponding to the two provided containers. //! \see storeMatVec() static Data allocateMatVecResult( const Data &matData, const Data &vecData, const bool transposeMatrix = false ) @@ -1618,10 +1643,8 @@ class ZeroView { } // for the final dimension, the variation type is always GENERAL // (Some combinations, e.g. CONSTANT/CONSTANT *would* generate a CONSTANT result, but constant matrices don't make a lot of sense beyond 1x1 matrices…) - resultVariationTypes[resultNumActiveDims] = GENERAL; resultActiveDims[resultNumActiveDims] = resultRank - 1; resultDataDims[resultNumActiveDims] = rows; - resultExtents[resultRank-1] = rows; resultNumActiveDims++; for (int i=resultRank; i<7; i++) @@ -1629,6 +1652,8 @@ class ZeroView { resultVariationTypes[i] = CONSTANT; resultExtents[i] = 1; } + resultVariationTypes[resultRank-1] = GENERAL; + resultExtents[resultRank-1] = rows; ScalarView data; if (resultNumActiveDims == 1) @@ -1730,6 +1755,64 @@ class ZeroView { } } + //! Places the result of a contraction along the final dimension of A and B into this data container. + void storeDotProduct(const Data &A, const Data &B) + { + const int D_DIM = A.rank() - 1; + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(A.extent_int(D_DIM) != B.extent_int(D_DIM), std::invalid_argument, "A and B have different extents"); + const int vectorComponents = A.extent_int(D_DIM); + + // shallow copy of this to avoid implicit references to this in call to getWritableEntry() below + Data thisData = *this; + + using ExecutionSpace = typename DeviceType::execution_space; + // note the use of getDataExtent() below: we only range over the possibly-distinct entries + if (rank_ == 1) // contraction result rank; e.g., (P) + { + Kokkos::parallel_for("compute dot product", getDataExtent(0), + KOKKOS_LAMBDA (const int &pointOrdinal) { + auto & val = thisData.getWritableEntry(pointOrdinal); + val = 0; + for (int i=0; i>({0,0},{getDataExtent(0),getDataExtent(1)}); + Kokkos::parallel_for("compute dot product", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) { + auto & val = thisData.getWritableEntry(cellOrdinal, pointOrdinal); + val = 0; + for (int i=0; i>({0,0,0},{getDataExtent(0),getDataExtent(1),getDataExtent(2)}); + Kokkos::parallel_for("compute dot product", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal, const int &d) { + auto & val = thisData.getWritableEntry(cellOrdinal, pointOrdinal,d); + val = 0; + for (int i=0; i void storeInPlaceCombination(const Data &A, const Data &B, BinaryOperator binaryOperator); @@ -1909,7 +1992,7 @@ class ZeroView { { Kokkos::parallel_for("compute mat-mat", policy, KOKKOS_LAMBDA (const int &cellOrdinal, const int &pointOrdinal) { - for (int i=0; i static void multiplyByCPWeights(Data &resultMatrixData, const Data &matrixDataIn, const Data &scalarDataIn) { - const ordinal_type rank = scalarDataIn.rank(); - auto extents = scalarDataIn.getExtents(); - auto variationTypes = scalarDataIn.getVariationTypes(); - extents[rank] = matrixDataIn.extent_int(rank); - extents[rank+1] = matrixDataIn.extent_int(rank+1); - variationTypes[rank] = CONSTANT; - variationTypes[rank+1] = CONSTANT; + const ordinal_type rank = scalarDataIn.rank(); + const ordinal_type matrixRank = matrixDataIn.rank(); + auto extents = scalarDataIn.getExtents(); + auto variationTypes = scalarDataIn.getVariationTypes(); + for (int r=rank; r static Data multiplyByCPWeights(const Data &matrixDataIn, const Data &scalarDataIn) { - const ordinal_type rank = scalarDataIn.rank(); - auto extents = scalarDataIn.getExtents(); - auto variationTypes = scalarDataIn.getVariationTypes(); - extents[rank] = matrixDataIn.extent_int(rank); - extents[rank+1] = matrixDataIn.extent_int(rank+1); - variationTypes[rank] = CONSTANT; - variationTypes[rank+1] = CONSTANT; + const ordinal_type rank = scalarDataIn.rank(); + const ordinal_type matrixRank = matrixDataIn.rank(); + auto extents = scalarDataIn.getExtents(); + auto variationTypes = scalarDataIn.getVariationTypes(); + for (int r=rank; r::allocateInPlaceCombinationResult(scalarDataInExtended, matrixDataIn); result.storeInPlaceProduct(matrixDataIn,scalarDataInExtended); return result; } + + //! Allocates and fills Data object corresponding to the transpose of matrix data, represented by the last two dimensions of the input object. + //! \param matrixDataIn [in] - the (…,D1,D2) container. + //! \return a (…,D2,D1) container containing the transpose of the input matrix data. + template + static Data transposeMatrix(const Data &matrixDataIn) + { + // A direct construction of the transpose could be more efficient, but here we take advantage of existing + // implementations within the Data class supporting matrix-matrix multiplication. We construct an identity + // matrix, and left-multiply this by the transpose of the input matrix. + const ordinal_type rank = matrixDataIn.rank(); + auto extents = matrixDataIn.getExtents(); + auto variationTypes = matrixDataIn.getVariationTypes(); + const auto D1 = extents[rank-2]; + + extents[rank-2] = D1; + extents[rank-1] = D1; + variationTypes[rank-2] = BLOCK_PLUS_DIAGONAL; + variationTypes[rank-1] = BLOCK_PLUS_DIAGONAL; + + Kokkos::View identityUnderlyingView("Intrepid2::DataTools::transposeMatrix() - identity view",D1); + Kokkos::deep_copy(identityUnderlyingView, 1.0); + Data identityData(identityUnderlyingView,extents,variationTypes); + + auto result = Data::allocateMatMatResult(true, matrixDataIn, false, identityData); + result.storeMatMat(true, matrixDataIn, false, identityData); + + return result; + } }; } diff --git a/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp b/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp index bc6250fed912..b177617fd448 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_TransformedBasisValues.hpp @@ -27,7 +27,7 @@ namespace Intrepid2 { /** \class Intrepid2::TransformedBasisValues \brief Structure-preserving representation of transformed vector data; reference space values and transformations are stored separately. - TransformedBasisValues provides a View-like interface of rank 4, with shape (C,F,P,D). When the corresponding accessor is used, the transformed value is determined from corresponding reference space values and the transformation. + TransformedBasisValues provides a View-like interface of rank 3 or 4, with shape (C,F,P) or (C,F,P,D). When the corresponding accessor is used, the transformed value is determined from corresponding reference space values and the transformation. */ template class TransformedBasisValues @@ -35,13 +35,13 @@ namespace Intrepid2 { public: ordinal_type numCells_; - Data transform_; // vector case: (C,P,D,D) jacobian or jacobian inverse; can also be unset for identity transform. Scalar case: (C,P), or unset for identity. + Data transform_; // vector case: (C,P,D,D) jacobian or jacobian inverse; can also be unset for identity transform. Scalar case: (C,P), or unset for identity. Contracted vector case: (C,P,D) transform, to be contracted with a vector field to produce a scalar result. BasisValues basisValues_; /** \brief Standard constructor. - \param [in] transform - the transformation (matrix), with logical shape (C,P) or (C,P,D,D) + \param [in] transform - the transformation (matrix), with logical shape (C,P), (C,P,D), or (C,P,D,D) \param [in] basisValues - the reference-space data to be transformed, with logical shape (F,P) (for scalar values) or (F,P,D) (for vector values) */ TransformedBasisValues(const Data &transform, const BasisValues &basisValues) @@ -52,6 +52,7 @@ namespace Intrepid2 { { // sanity check: when transform is diagonal, we expect there to be no pointwise variation. INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE(transform_.isDiagonal() && (transform_.getVariationTypes()[1] != CONSTANT), std::invalid_argument, "When transform is diagonal, we assume in various places that there is no pointwise variation; the transform_ Data should have CONSTANT as its variation type in dimension 1."); + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE((transform_.rank() < 2) || (transform_.rank() > 4), std::invalid_argument, "Only transforms of rank 2, 3, or 4 are supported"); } /** @@ -129,7 +130,7 @@ namespace Intrepid2 { } else { - if (transform_.rank() == 4) + if ((transform_.rank() == 4) || (transform_.rank() == 3)) { transform_ = DataTools::multiplyByCPWeights(transform_,weightData); } @@ -164,7 +165,22 @@ namespace Intrepid2 { //! Returns the logical extent in the space dimension, which is the 3 dimension in this container. KOKKOS_INLINE_FUNCTION int spaceDim() const { - return basisValues_.extent_int(2); + if ((transform_.rank() == 3) && (basisValues_.rank() == 3)) // (C,P,D) contracted in D against (F,P,D) + { + return 1; // spaceDim contracted away + } + else if ((transform_.rank() == 3) && (basisValues_.rank() == 2)) // (C,P,D) weighting (F,P) + { + return transform_.extent_int(2); + } + else if (transform_.isValid()) + { + return transform_.extent_int(2); + } + else + { + return basisValues_.extent_int(2); + } } //! Scalar accessor, with arguments (C,F,P). @@ -175,10 +191,20 @@ namespace Intrepid2 { // null transform is understood as the identity return basisValues_(fieldOrdinal,pointOrdinal); } - else + else if (transform_.rank() == 2) { return transform_(cellOrdinal,pointOrdinal) * basisValues_(fieldOrdinal,pointOrdinal); } + else if (transform_.rank() == 3) + { + Scalar value = 0; + for (int d=0; d("vector2", cellCount, pointCount, spaceDim); + auto vector2ViewHost = Kokkos::create_mirror(vector2View); + vector2ViewHost(0,0,0) = 3.0; + vector2ViewHost(0,0,1) = 2.0; + Kokkos::deep_copy(vector2View, vector2ViewHost); + + Data u_data(vectorView); + Data A_data(matrixView); + Data v_data(vector2View); + + auto AvResultData = Data::allocateMatVecResult(A_data, v_data, false); + AvResultData.storeMatVec(A_data, v_data, false); + + auto upAvResultData = Data::allocateDotProductResult(u_data, AvResultData); + upAvResultData.storeDotProduct(u_data, AvResultData); + + auto ApuResultData = Data::allocateMatVecResult(A_data, u_data, true); + ApuResultData.storeMatVec(A_data, u_data, true); + + auto vpAuResultData = Data::allocateDotProductResult(v_data, ApuResultData); + vpAuResultData.storeDotProduct(v_data, ApuResultData); + + testFloatingEquality2(upAvResultData, vpAuResultData, relTol, absTol, out, success); + printView(upAvResultData.getUnderlyingView2(), out); + printView(vpAuResultData.getUnderlyingView2(), out); } // #pragma mark Data: MatMat @@ -576,6 +605,82 @@ namespace printView(actualResultData.getUnderlyingView3(), out); } +/** \brief Data provides matrix-matrix multiplication support. This method checks correctness of the computed mat-mat for a case arising from taking the outer product of two vectors. +*/ + TEUCHOS_UNIT_TEST( Data, MatMatOuterProduct ) + { + double relTol = 1e-13; + double absTol = 1e-13; + + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + const int spaceDim = 2; + const int cellCount = 1; + const int pointCount = 1; + auto leftVectorView = getView("left vector", cellCount, pointCount, spaceDim); + auto leftVectorViewHost = Kokkos::create_mirror(leftVectorView); + leftVectorViewHost(0,0,0) = 1.0; + leftVectorViewHost(0,0,1) = 0.5; + Kokkos::deep_copy(leftVectorView, leftVectorViewHost); + + Data leftVector(leftVectorView); + + auto rightVectorView = getView("right vector", cellCount, pointCount, spaceDim); + auto rightVectorViewHost = Kokkos::create_mirror(rightVectorView); + rightVectorViewHost(0,0,0) = 0.5; + rightVectorViewHost(0,0,1) = 1.0; + Kokkos::deep_copy(rightVectorView, rightVectorViewHost); + Data rightVector(rightVectorView); + + // re-cast leftVector as a rank 4 (C,P,D,1) object -- a D x 1 matrix at each (C,P). + const int newRank = 4; + auto extents = leftVector.getExtents(); + auto variationTypes = leftVector.getVariationTypes(); + auto leftMatrix = leftVector.shallowCopy(newRank, extents, variationTypes); + + // re-cast rightVector as a rank 4 (C,P,1,D) object -- a 1 x D matrix at each (C,P) + extents = rightVector.getExtents(); + extents[3] = extents[2]; + extents[2] = 1; + variationTypes = rightVector.getVariationTypes(); + variationTypes[3] = variationTypes[2]; + variationTypes[2] = CONSTANT; + auto rightMatrix = rightVector.shallowCopy(newRank, extents, variationTypes); + + auto expectedResultView = getView("result matrix", cellCount, pointCount, spaceDim, spaceDim); + auto expectedResultViewHost = Kokkos::create_mirror(expectedResultView); + + const int cellOrdinal = 0; + for (int i=0; i::allocateMatMatResult(transposeA, leftMatrix, transposeB, rightMatrix); + + TEST_EQUALITY( 4, actualResultData.rank()); + TEST_EQUALITY( cellCount, actualResultData.extent_int(0)); + TEST_EQUALITY(pointCount, actualResultData.extent_int(1)); + TEST_EQUALITY( spaceDim, actualResultData.extent_int(2)); + TEST_EQUALITY( spaceDim, actualResultData.extent_int(3)); + + actualResultData.storeMatMat(transposeA, leftMatrix, transposeB, rightMatrix); + + testFloatingEquality4(expectedResultView, actualResultData, relTol, absTol, out, success); + + printView(actualResultData.getUnderlyingView(), out); + } + // #pragma mark Data: MatMatExplicitIdentity_PDD /** \brief Data provides matrix-matrix multiplication support. This method checks correctness of the computed mat-mat for several cases involving 3x3 identity matrices. Here, the logical dimensions (C,P,D,D) differ from the stored dimensions of (P,D,D). We test each possible transpose combination. */ @@ -725,6 +830,48 @@ TEUCHOS_UNIT_TEST( Data, MatMatExplicitIdentity_PDD ) // (P,D,D) underlying; not printView(actualResultData.getUnderlyingView2(), out); } + +// #pragma mark Data: VecDotProduct +/** \brief Data provides vector dot product multiplication support. This method checks correctness of the computed dot product for a particular case involving 2x1 vectors. +*/ + TEUCHOS_UNIT_TEST( Data, VecDotProduct ) + { + double relTol = 1e-13; + double absTol = 1e-13; + + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + const int numCells = 1; + const int spaceDim = 2; + + auto vec1View = getView("vector", numCells, spaceDim); + auto vec1ViewHost = Kokkos::create_mirror(vec1View); + + vec1ViewHost(0,0) = 1.0; + vec1ViewHost(0,1) = 2.0; + Kokkos::deep_copy(vec1View, vec1ViewHost); + + auto vec2View = getView("vector", numCells, spaceDim); + auto vec2ViewHost = Kokkos::create_mirror(vec1View); + + vec2ViewHost(0,0) = 3.0; + vec2ViewHost(0,1) = 2.0; + Kokkos::deep_copy(vec2View, vec2ViewHost); + + auto expectedResultView = getView("result",numCells); + auto expectedResultViewHost = Kokkos::create_mirror(expectedResultView); + + expectedResultViewHost(0) = vec1ViewHost(0,0) * vec2ViewHost(0,0) + vec1ViewHost(0,1) * vec2ViewHost(0,1); + + Kokkos::deep_copy(expectedResultView, expectedResultViewHost); + + Data vec1Data(vec1View); + Data vec2Data(vec2View); + auto actualResultData = Data::allocateDotProductResult(vec1Data, vec2Data); + actualResultData.storeDotProduct(vec1Data, vec2Data); + + testFloatingEquality1(expectedResultView, actualResultData.getUnderlyingView1(), relTol, absTol, out, success); + } // test statically that Data supports all 7 rank operators static_assert(supports_rank,1>::value, "Data is expected to support up to rank 7"); diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp index 257b700bab2f..fd5672916aad 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStandardIntegration.cpp @@ -83,6 +83,7 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, EFunctionSpace fs; EOperator op1, op2; int numOps = 0; // can be 1 or 2 + Teuchos::RCP> vectorWeight1, vectorWeight2; switch (formulation) { case Poisson: @@ -113,12 +114,32 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, op1 = EOperator::OPERATOR_VALUE; fs = EFunctionSpace::FUNCTION_SPACE_HDIV; break; + case VectorWeightedPoisson: + numOps = 1; + op1 = EOperator::OPERATOR_GRAD; + fs = EFunctionSpace::FUNCTION_SPACE_HGRAD; + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(geometry, worksetSize, - polyOrder, fs, op1, - polyOrder, fs, op1, + polyOrder, fs, op1, vectorWeight1, + polyOrder, fs, op1, vectorWeight2, flopCountIntegration, flopCountJacobian); if (numOps == 2) { @@ -136,7 +157,7 @@ void testStandardIntegration(int meshWidth, int polyOrder, int worksetSize, }); } - auto specificIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto specificIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing new general standard assembly implementation to previous formulation-specific integration path…\n"; testFloatingEquality3(generalIntegrals, specificIntegrals, relTol, absTol, out, success, "general integral", "specific formulation integral"); @@ -167,4 +188,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardInteg TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, PoissonFormulation, D2, P3) TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, PoissonFormulation, D3, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D1, P1) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D2, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStandardIntegration, VectorWeightedPoissonFormulation, D3, P3) + } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp index 7e67f3b15579..7d12fe961809 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_GeneralStructuredIntegration.cpp @@ -83,6 +83,7 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, EFunctionSpace fs; EOperator op1, op2; int numOps = 0; // can be 1 or 2 + Teuchos::RCP> vectorWeight1, vectorWeight2; switch (formulation) { case Poisson: @@ -113,12 +114,32 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, op1 = EOperator::OPERATOR_VALUE; fs = EFunctionSpace::FUNCTION_SPACE_HDIV; break; + case VectorWeightedPoisson: + numOps = 1; + op1 = EOperator::OPERATOR_GRAD; + fs = EFunctionSpace::FUNCTION_SPACE_HGRAD; + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(geometry, worksetSize, - polyOrder, fs, op1, - polyOrder, fs, op1, + polyOrder, fs, op1, vectorWeight1, + polyOrder, fs, op1, vectorWeight2, flopCountIntegration, flopCountJacobian); if (numOps == 2) { @@ -136,7 +157,7 @@ void testStructuredIntegration(int meshWidth, int polyOrder, int worksetSize, }); } - auto specificIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto specificIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing new general standard assembly implementation to previous formulation-specific integration path…\n"; testFloatingEquality3(generalIntegrals, specificIntegrals, relTol, absTol, out, success, "general integral", "specific formulation integral"); @@ -167,4 +188,8 @@ TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredInt TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, PoissonFormulation, D2, P3) TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, PoissonFormulation, D3, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D1, P1) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D2, P3) +TEUCHOS_UNIT_TEST_TEMPLATE_3_INSTANT(StructuredIntegration, GeneralStructuredIntegration, VectorWeightedPoissonFormulation, D3, P3) + } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp index cad8b2a13534..ab1e182c0417 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_QuadratureUniformMesh.cpp @@ -74,11 +74,31 @@ namespace gridCellCounts[d] = meshWidth; } + Teuchos::RCP> vectorWeight1, vectorWeight2; + if (formulation == VectorWeightedPoisson) + { + vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d(algorithm, gridCellCounts); double flopCountIntegration = 0, flopCountJacobian = 0; - auto standardIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto standardIntegrals = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); - auto structuredIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian); + auto structuredIntegrals = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, flopCountIntegration, flopCountJacobian, vectorWeight1, vectorWeight2); out << "Comparing standard Intrepid2 integration to new integration path…\n"; testFloatingEquality3(standardIntegrals, structuredIntegrals, relTol, absTol, out, success, "standard Intrepid2 integral", "structured integral"); @@ -108,170 +128,179 @@ namespace // comparisons are to Standard algorithm, so we don't instantiate with Standard: // 1D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P1) // 1D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P2) // 1D, p=4 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P4) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D1, P4) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D1, P4) // 2D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P1) // 2D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P2) // 2D, p=3 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D2, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D2, P3) - // 3D, p=1 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P1) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P1) + // 3D, p=1 tests: + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P1) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P1) // 3D, p=2 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P2) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P2) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P2) // 3D, p=3 tests: - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P3) - TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, PoissonFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HgradFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HdivFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, HcurlFormulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, NonAffineTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, AffineNonTensorAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, L2Formulation, UniformAlgorithm, D3, P3) + TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, QuadratureUniformMesh, VectorWeightedPoissonFormulation, NonAffineTensorAlgorithm, D3, P3) } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp index 28ffcb37b3bd..27059b43e728 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_StructuredVersusStandard.cpp @@ -64,8 +64,8 @@ namespace template void testStandardVersusStructuredIntegration(const int &meshWidth, const int &worksetSize, - const EFunctionSpace &fs1, const EOperator &op1, const int &p1, - const EFunctionSpace &fs2, const EOperator &op2, const int &p2, + const EFunctionSpace &fs1, const EOperator &op1, const int &p1, Teuchos::RCP< Kokkos::Array > vectorWeight1, + const EFunctionSpace &fs2, const EOperator &op2, const int &p2, Teuchos::RCP< Kokkos::Array > vectorWeight2, const double &relTol, const double &absTol, Teuchos::FancyOStream &out, bool &success) { @@ -84,19 +84,32 @@ void testStandardVersusStructuredIntegration(const int &meshWidth, const int &wo double flopCountIntegration = 0, flopCountJacobian = 0; auto structuredIntegrals = performStructuredAssembly(geometry, worksetSize, - p1, fs1, op1, - p2, fs2, op2, + p1, fs1, op1, vectorWeight1, + p2, fs2, op2, vectorWeight2, flopCountIntegration, flopCountJacobian); auto standardIntegrals = performStandardAssembly(geometry, worksetSize, - p1, fs1, op1, - p2, fs2, op2, + p1, fs1, op1, vectorWeight1, + p2, fs2, op2, vectorWeight2, flopCountIntegration, flopCountJacobian); out << "Comparing general standard assembly to structured integration path…\n"; testFloatingEquality3(standardIntegrals, structuredIntegrals, relTol, absTol, out, success, "standard integral", "structured formulation integral"); } +template +void testStandardVersusStructuredIntegration(const int &meshWidth, const int &worksetSize, + const EFunctionSpace &fs1, const EOperator &op1, const int &p1, + const EFunctionSpace &fs2, const EOperator &op2, const int &p2, + const double &relTol, const double &absTol, + Teuchos::FancyOStream &out, bool &success) +{ + testStandardVersusStructuredIntegration(meshWidth, worksetSize, + fs1, op1, p1, Teuchos::null, + fs2, op2, p2, Teuchos::null, + relTol, absTol, out, success); +} + TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandard_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) { using DataScalar = double; @@ -322,6 +335,381 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandar (meshWidth, worksetSize, fs1, op1, p1, fs2, op2, p2, relTol, absTol, out, success); } +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 2; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + Teuchos::RCP > vectorWeight1; // no vector weight on scalar term + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 1; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 2; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 1; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + double weight = 1.0; + for (int d=0; d > vectorWeight2; // no vector weight on scalar term + + using DeviceType = DefaultTestDeviceType; + using BasisFamily = DerivedNodalBasisFamily; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + +TEUCHOS_UNIT_TEST_TEMPLATE_4_DECL(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, FS1Tag, Op1Tag, FS2Tag, Op2Tag) +{ + using DataScalar = double; + using PointScalar = double; + const int meshWidth = 1; + const int spaceDim = 3; + const int p1 = 2; + const int p2 = 1; + const int worksetSize = meshWidth; + + auto vectorWeight1 = Teuchos::rcp(new Kokkos::Array); + auto vectorWeight2 = Teuchos::rcp(new Kokkos::Array); + + double weight = 1.0; + for (int d=0; d; + + const EFunctionSpace fs1 = FS1Tag::functionSpace; + const EFunctionSpace fs2 = FS2Tag::functionSpace; + const EOperator op1 = Op1Tag::op; + const EOperator op2 = Op2Tag::op; + + double relTol = 1e-12; + double absTol = 1e-12; + + testStandardVersusStructuredIntegration + (meshWidth, worksetSize, fs1, op1, p1, vectorWeight1, fs2, op2, p2, vectorWeight2, relTol, absTol, out, success); +} + // asymmetric tests (mostly -- a couple symmetric ones tossed in as sanity checks on the test itself) // 1D tests: H(grad) and H(vol) bases defined @@ -338,6 +726,17 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D1_P2_P1, HGRAD, VALUE, HGRAD, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D1_P2_P1, HVOL, VALUE, HGRAD, VALUE) +// 1D vector-weighted test +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D1_P1_P1, HGRAD, GRAD, HGRAD, GRAD) + +// 1D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D1_P1_P1, HGRAD, VALUE, HGRAD, GRAD) + +// 1D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D1_P1_P1, HGRAD, GRAD, HGRAD, VALUE) + // 2D tests: curls of H(curl) are scalars. // p1, p1: TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P1, HGRAD, GRAD, HGRAD, GRAD) @@ -367,6 +766,22 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P2, HCURL, CURL, HVOL, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D2_P1_P2, HVOL, VALUE, HGRAD, VALUE) +// 2D vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P1_P1, HCURL, VALUE, HDIV, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D2_P2_P1, HCURL, VALUE, HDIV, VALUE) + +// 2D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HGRAD, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D2_P1_P1, HGRAD, VALUE, HDIV, VALUE) + +// 2D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HGRAD, GRAD, HGRAD, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D2_P1_P1, HDIV, VALUE, HGRAD, VALUE) + // 3D tests: curls of H(curl) are vectors // p1, p1: TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P1, HGRAD, GRAD, HGRAD, GRAD) @@ -396,5 +811,19 @@ TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStan TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P2, HCURL, CURL, HDIV, VALUE) TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandard_D3_P1_P2, HVOL, VALUE, HGRAD, VALUE) +// 3D vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HGRAD, GRAD, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HCURL, VALUE, HDIV, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorWeighted_D3_P2_P1, HCURL, CURL, HGRAD, GRAD) + +// 3D scalar against vector-weighted tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HVOL, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HGRAD, VALUE, HGRAD, GRAD) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardScalarAgainstVectorDotVector_D3_P1_P1, HGRAD, VALUE, HDIV, VALUE) + +// 3D vector-weighted against scalar tests +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HGRAD, GRAD, HVOL, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HGRAD, GRAD, HGRAD, VALUE) +TEUCHOS_UNIT_TEST_TEMPLATE_4_INSTANT(StructuredIntegration, StructuredVersusStandardVectorDotVectorAgainstScalar_D3_P1_P1, HDIV, VALUE, HGRAD, VALUE) } // anonymous namespace diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp index bb1ce87fd872..fbafa35407d4 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_TagDefs.hpp @@ -23,7 +23,8 @@ enum FormulationChoice Hgrad, // (grad, grad) + (value, value) Hdiv, // (div, div) + (value, value) Hcurl, // (curl, curl) + (value, value) - L2 // (value, value) + L2, // (value, value) + VectorWeightedPoisson // (a dot grad, b dot grad) }; enum AlgorithmChoice @@ -64,6 +65,10 @@ class L2Formulation { public: static const FormulationChoice formulation = L2; }; +class VectorWeightedPoissonFormulation { +public: + static const FormulationChoice formulation = VectorWeightedPoisson; +}; class StandardAlgorithm { public: diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp index 799fa0135efe..513de612af3d 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/StructuredIntegrationTests_Utils.hpp @@ -26,6 +26,8 @@ #include "HCURLStructuredAssembly.hpp" #include "HVOLStandardAssembly.hpp" #include "HVOLStructuredAssembly.hpp" +#include "VectorWeightedGRADGRADStandardAssembly.hpp" +#include "VectorWeightedGRADGRADStructuredAssembly.hpp" template< typename PointScalar, int spaceDim, typename DeviceType > inline @@ -65,10 +67,12 @@ CellGeometry getMesh(AlgorithmChoice algorith return uniformTensorGeometry; // this line should be unreachable; included to avoid compiler warnings from nvcc } -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStandardQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP< Kokkos::Array > vectorWeight1 = Teuchos::null, + Teuchos::RCP< Kokkos::Array > vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -82,15 +86,19 @@ Intrepid2::ScalarView performStandardQuadrature(FormulationCh return performStandardQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStandardQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStandardQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported formulation"); } } -template +template // spaceDim and spaceDim2 should agree on value (differ on type) Intrepid2::ScalarView performStructuredQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP< Kokkos::Array > vectorWeight1 = Teuchos::null, + Teuchos::RCP< Kokkos::Array > vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -104,6 +112,8 @@ Intrepid2::ScalarView performStructuredQuadrature(Formulation return performStructuredQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStructuredQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStructuredQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unsupported formulation"); } diff --git a/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp b/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp index d9c388910cfb..55772e28a89d 100644 --- a/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp +++ b/packages/intrepid2/unit-test/MonolithicExecutable/TransformedBasisValuesTests.cpp @@ -8,7 +8,7 @@ // @HEADER -/** \file TransformedVectorDataTests.cpp +/** \file TransformedBasisValuesTests.cpp \brief Tests against TransformedBasisValues. \author Created by Nate Roberts */ @@ -341,6 +341,195 @@ namespace testFloatingEquality4(transformedGradValues, transformedGradientData, relTol, absTol, out, success); } + // testVectorWeightedTransformation tests against a (C,P,D) transformation of a gradient field. + template + void testWeightedVectorTransformation(const int &polyOrder, const int &meshWidth, Teuchos::FancyOStream &out, bool &success) + { + using DeviceType = DefaultTestDeviceType; + using Scalar = double; + using PointScalar = double; + + const double relTol = 1e-12; + const double absTol = 1e-12; + + auto fs = Intrepid2::FUNCTION_SPACE_HGRAD; + + auto lineBasis = Intrepid2::getLineBasis< Intrepid2::NodalBasisFamily >(fs, polyOrder); + + int numFields_1D = lineBasis->getCardinality(); + + int numFields = 1; + int numHypercubes = 1; + for (int d=0; d >(); + shards::CellTopology cellTopo; + if (spaceDim == 1) cellTopo = shards::getCellTopologyData< shards::Line<> >(); + else if (spaceDim == 2) cellTopo = shards::getCellTopologyData< shards::Quadrilateral<> >(); + else if (spaceDim == 3) cellTopo = shards::getCellTopologyData< shards::Hexahedron<> >(); + + auto lineCubature = Intrepid2::DefaultCubatureFactory::create(lineTopo,polyOrder*2); + int numPoints_1D = lineCubature->getNumPoints(); + ScalarView lineCubaturePoints("line cubature points",numPoints_1D,1); + ScalarView lineCubatureWeights("line cubature weights", numPoints_1D); + + lineCubature->getCubature(lineCubaturePoints, lineCubatureWeights); + + // Allocate some intermediate containers + ScalarView lineBasisValues ("line basis values", numFields_1D, numPoints_1D ); + ScalarView lineBasisGradValues("line basis grad values", numFields_1D, numPoints_1D, 1); + + // for now, we use 1D values to build up the 2D or 3D gradients + // eventually, TensorBasis should offer a getValues() variant that returns tensor basis data + lineBasis->getValues(lineBasisValues, lineCubaturePoints, Intrepid2::OPERATOR_VALUE ); + lineBasis->getValues(lineBasisGradValues, lineCubaturePoints, Intrepid2::OPERATOR_GRAD ); + + // drop the trivial space dimension in line gradient values: + Kokkos::resize(lineBasisGradValues, numFields_1D, numPoints_1D); + + Kokkos::Array, spaceDim> vectorComponents; + + for (int d=0; d, spaceDim> gradComponent_d; + for (int d2=0; d2(lineBasisGradValues); + else gradComponent_d[d2] = Data(lineBasisValues); + } + vectorComponents[d] = TensorData(gradComponent_d); + } + VectorData gradientVectorData(vectorComponents, false); // false: not axis-aligned + BasisValues gradientValues(gradientVectorData); + + CellGeometry cellNodes = uniformCartesianMesh(1.0, meshWidth); + + // goal here is to do a vector-weighted Poisson; i.e. (f a_u \cdot grad u, a_v \cdot grad v) on each cell + + int pointsPerCell = 1; + for (int d=0; d::allocateJacobianDet(jacobian); + auto jacobianInv = CellTools::allocateJacobianInv(jacobian); + cellNodes.setJacobian( jacobian, pointsPerCell); + CellTools::setJacobianDet(jacobianDet, jacobian); + CellTools::setJacobianInv(jacobianInv, jacobian); + + auto auView = getView("a_u", spaceDim); + auto auViewHost = Kokkos::create_mirror(auView); + double weight = 1.0; + for (int d=0; d("a_v", spaceDim); + auto avViewHost = Kokkos::create_mirror(avView); + weight = 0.5; + for (int d=0; d au_data(auView, Kokkos::Array{numCells,pointsPerCell,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + Data av_data(avView, Kokkos::Array{numCells,pointsPerCell,spaceDim}, Kokkos::Array{CONSTANT,CONSTANT,GENERAL}); + + auto uTransform = Data::allocateMatVecResult(jacobianInv, au_data, true); + auto vTransform = Data::allocateMatVecResult(jacobianInv, av_data, true); + + uTransform.storeMatVec(jacobianInv, au_data, true); // true: transpose jacobianInv when multiplying + vTransform.storeMatVec(jacobianInv, av_data, true); // true: transpose jacobianInv when multiplying + + Intrepid2::TransformedBasisValues utransformedBasisGradients(uTransform, gradientValues); + Intrepid2::TransformedBasisValues vtransformedBasisGradients(vTransform, gradientValues); + + int numPoints = 1; + for (int d=0; d expanded_uTransformedGradValues("transformed a_u dot grad values", numCells, numFields, numPoints); + ScalarView expanded_vTransformedGradValues("transformed a_v dot grad values", numCells, numFields, numPoints); + + auto basis = Intrepid2::getBasis< Intrepid2::NodalBasisFamily >(cellTopo, fs, polyOrder); + + // Allocate some intermediate containers + ScalarView basisValues ("basis values", numFields, numPoints ); + ScalarView basisGradValues("basis grad values", numFields, numPoints, spaceDim); + + ScalarView transformedGradValues("transformed grad values", numCells, numFields, numPoints, spaceDim); + ScalarView transformedWeightedGradValues("transformed weighted grad values", numCells, numFields, numPoints, spaceDim); + + auto cubature = Intrepid2::DefaultCubatureFactory::create(cellTopo,polyOrder*2); + TEST_EQUALITY( numPoints, cubature->getNumPoints()); + ScalarView cubaturePoints("cubature points",numPoints,spaceDim); + ScalarView cubatureWeights("cubature weights", numPoints); + + cubature->getCubature(cubaturePoints, cubatureWeights); + + basis->getValues(basisValues, cubaturePoints, Intrepid2::OPERATOR_VALUE ); + basis->getValues(basisGradValues, cubaturePoints, Intrepid2::OPERATOR_GRAD ); + + const int numNodesPerCell = cellNodes.numNodesPerCell(); + ScalarView expandedCellNodes("expanded cell nodes",numCells,numNodesPerCell,spaceDim); + + using ExecutionSpace = typename DeviceType::execution_space; + auto policy = Kokkos::MDRangePolicy>({0,0},{numCells,numNodesPerCell}); + Kokkos::parallel_for("fill expanded cell nodes", policy, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &nodeOrdinal) + { + for (int d=0; d expandedJacobian("jacobian", numCells, numPoints, spaceDim, spaceDim); + ScalarView expandedJacobianInverse("jacobian inverse", numCells, numPoints, spaceDim, spaceDim); + + using CellTools = Intrepid2::CellTools; + using ExecutionSpace = typename DeviceType::execution_space; + using FunctionSpaceTools = Intrepid2::FunctionSpaceTools; + + CellTools::setJacobian(expandedJacobian, cubaturePoints, expandedCellNodes, cellTopo); + CellTools::setJacobianInv(expandedJacobianInverse, expandedJacobian); + + FunctionSpaceTools::HGRADtransformGRAD(transformedGradValues, expandedJacobianInverse, basisGradValues); + + auto policy3 = Kokkos::MDRangePolicy>({0,0,0},{numCells,numFields,numPoints}); + Kokkos::parallel_for("compute expanded_{u,v}TransformedGradValues", policy3, + KOKKOS_LAMBDA (const int &cellOrdinal, const int &fieldOrdinal, const int &pointOrdinal) + { + Scalar u_result = 0; + Scalar v_result = 0; + for (int d=0; d(polyOrder, meshWidth, out, success); } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_1D_p1 ) + { + const int spaceDim = 1; + const int polyOrder = 1; + const int meshWidth = 10; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_1D_p2 ) + { + const int spaceDim = 1; + const int polyOrder = 2; + const int meshWidth = 10; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_2D_p1 ) + { + const int spaceDim = 2; + const int polyOrder = 1; + const int meshWidth = 3; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } + + TEUCHOS_UNIT_TEST( TransformedBasisValues, TransformedWeightedVector_2D_p2 ) + { + const int spaceDim = 2; + const int polyOrder = 2; + const int meshWidth = 3; + testWeightedVectorTransformation(polyOrder, meshWidth, out, success); + } } // anonymous namespace diff --git a/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp b/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp index ab83eda5d694..1d708f698aca 100644 --- a/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp +++ b/packages/intrepid2/unit-test/performance/StructuredIntegration/StructuredIntegrationPerformance.cpp @@ -36,6 +36,8 @@ #include "HCURLStructuredAssembly.hpp" #include "HVOLStandardAssembly.hpp" #include "HVOLStructuredAssembly.hpp" +#include "VectorWeightedGRADGRADStandardAssembly.hpp" +#include "VectorWeightedGRADGRADStructuredAssembly.hpp" enum FormulationChoice { @@ -44,6 +46,7 @@ enum FormulationChoice Hdiv, // (div, div) + (value, value) Hcurl, // (curl, curl) + (value, value) L2, // (value, value) + VectorWeightedPoisson, UnknownFormulation }; @@ -81,11 +84,12 @@ std::string to_string(AlgorithmChoice choice) std::string to_string(FormulationChoice choice) { switch (choice) { - case Poisson: return "Poisson"; - case Hgrad: return "Hgrad"; - case Hdiv: return "Hdiv"; - case Hcurl: return "Hcurl"; - case L2: return "L2"; + case Poisson: return "Poisson"; + case Hgrad: return "Hgrad"; + case Hdiv: return "Hdiv"; + case Hcurl: return "Hcurl"; + case L2: return "L2"; + case VectorWeightedPoisson: return "VectorWeightedPoisson"; default: return "Unknown FormulationChoice"; } @@ -230,10 +234,12 @@ getMeshWidths(int basisCardinality, int maxStiffnessEntryCount, int maxElements) return meshWidths; } -template +template Intrepid2::ScalarView performStandardQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP> vectorWeight1 = Teuchos::null, + Teuchos::RCP> vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -247,15 +253,19 @@ Intrepid2::ScalarView performStandardQuadrature(FormulationCh return performStandardQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStandardQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStandardQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: return Intrepid2::ScalarView(); } } -template +template Intrepid2::ScalarView performStructuredQuadrature(FormulationChoice formulation, - Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, - double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount) + Intrepid2::CellGeometry &geometry, const int &polyOrder, const int &worksetSize, + double &transformIntegrateFlopCount, double &jacobianCellMeasureFlopCount, + Teuchos::RCP> vectorWeight1 = Teuchos::null, + Teuchos::RCP> vectorWeight2 = Teuchos::null) { switch (formulation) { @@ -269,6 +279,8 @@ Intrepid2::ScalarView performStructuredQuadrature(Formulation return performStructuredQuadratureHCURL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); case L2: return performStructuredQuadratureHVOL(geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + case VectorWeightedPoisson: + return performStructuredQuadratureVectorWeightedGRADGRAD(geometry, polyOrder, worksetSize, vectorWeight1, vectorWeight2, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); default: return Intrepid2::ScalarView(); } @@ -280,12 +292,13 @@ typename BasisFamily::BasisPtr getBasisForFormulation(FormulationChoice formulat Intrepid2::EFunctionSpace fs; switch (formulation) { - case Poisson: fs = FUNCTION_SPACE_HGRAD; break; - case Hgrad: fs = FUNCTION_SPACE_HGRAD; break; - case Hdiv: fs = FUNCTION_SPACE_HDIV; break; - case Hcurl: fs = FUNCTION_SPACE_HCURL; break; - case L2: fs = FUNCTION_SPACE_HVOL; break; - case UnknownFormulation: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unknown formulation"); + case Poisson: fs = FUNCTION_SPACE_HGRAD; break; + case Hgrad: fs = FUNCTION_SPACE_HGRAD; break; + case Hdiv: fs = FUNCTION_SPACE_HDIV; break; + case Hcurl: fs = FUNCTION_SPACE_HCURL; break; + case L2: fs = FUNCTION_SPACE_HVOL; break; + case VectorWeightedPoisson: fs = FUNCTION_SPACE_HGRAD; break; + case UnknownFormulation: INTREPID2_TEST_FOR_EXCEPTION(true, std::invalid_argument, "Unknown formulation"); } auto basis = getBasis< BasisFamily >(cellTopo, fs, polyOrder); @@ -350,7 +363,7 @@ map,map > getWorksetSizeM map,map > worksetSizeMap; // keys are maps p -> worksetSize vector allAlgorithmChoices {Standard, NonAffineTensor, AffineTensor, Uniform}; - vector allFormulationChoices {Poisson, Hgrad, Hdiv, Hcurl, L2}; + vector allFormulationChoices {Poisson, Hgrad, Hdiv, Hcurl, L2, VectorWeightedPoisson}; // skip calibration case; want that to span workset sizes in a particular way… vector allModes {Test,BestSerial,BestOpenMP_16,BestCuda,Precalibrated}; @@ -590,6 +603,48 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 1; worksetSizeMap[affineTensorKey][8] = 1; } + { + // VectorWeightedPoisson + // These calibrations were run 5-25-24 on an M2 Ultra, on a fork expected to be merged into Trilinos develop soon. + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + // best for VectorWeightedPoisson - these are for meshes that range from 32,768 for p=1 to 128 for p=10 + worksetSizeMap[standardKey][1] = 4096; + worksetSizeMap[standardKey][2] = 1024; + worksetSizeMap[standardKey][3] = 32; + worksetSizeMap[standardKey][4] = 4; + worksetSizeMap[standardKey][5] = 1; + worksetSizeMap[standardKey][6] = 1; + worksetSizeMap[standardKey][7] = 1; + worksetSizeMap[standardKey][8] = 1; + worksetSizeMap[standardKey][9] = 1; + worksetSizeMap[standardKey][10] = 1; + + worksetSizeMap[nonAffineTensorKey][1] = 2048; + worksetSizeMap[nonAffineTensorKey][2] = 2048; + worksetSizeMap[nonAffineTensorKey][3] = 128; + worksetSizeMap[nonAffineTensorKey][4] = 16; + worksetSizeMap[nonAffineTensorKey][5] = 2; + worksetSizeMap[nonAffineTensorKey][6] = 1; + worksetSizeMap[nonAffineTensorKey][7] = 1; + worksetSizeMap[nonAffineTensorKey][8] = 1; + worksetSizeMap[nonAffineTensorKey][9] = 1; + worksetSizeMap[nonAffineTensorKey][10] = 1; + + worksetSizeMap[affineTensorKey][1] = 32768; + worksetSizeMap[affineTensorKey][2] = 8192; + worksetSizeMap[affineTensorKey][3] = 128; + worksetSizeMap[affineTensorKey][4] = 8; + worksetSizeMap[affineTensorKey][5] = 2; + worksetSizeMap[affineTensorKey][6] = 1; + worksetSizeMap[affineTensorKey][7] = 1; + worksetSizeMap[affineTensorKey][8] = 1; + worksetSizeMap[affineTensorKey][9] = 1; + worksetSizeMap[affineTensorKey][10] = 1; + } } // BestSerial case break; case BestOpenMP_16: @@ -774,6 +829,48 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 16; worksetSizeMap[affineTensorKey][8] = 16; } + { + // VectorWeightedPoisson + // These calibrations were run 5-25-24 on an M2 Ultra, on a fork expected to be merged into Trilinos develop soon. + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + // best for VectorWeightedPoisson - these are for meshes that range from 32,768 for p=1 to 128 for p=10 + worksetSizeMap[standardKey][1] = 16384; + worksetSizeMap[standardKey][2] = 16384; + worksetSizeMap[standardKey][3] = 8192; + worksetSizeMap[standardKey][4] = 1024; + worksetSizeMap[standardKey][5] = 1024; + worksetSizeMap[standardKey][6] = 1024; + worksetSizeMap[standardKey][7] = 512; + worksetSizeMap[standardKey][8] = 256; + worksetSizeMap[standardKey][9] = 128; + worksetSizeMap[standardKey][10] = 32; + + worksetSizeMap[nonAffineTensorKey][1] = 32768; + worksetSizeMap[nonAffineTensorKey][2] = 8192; + worksetSizeMap[nonAffineTensorKey][3] = 8192; + worksetSizeMap[nonAffineTensorKey][4] = 4096; + worksetSizeMap[nonAffineTensorKey][5] = 4096; + worksetSizeMap[nonAffineTensorKey][6] = 64; + worksetSizeMap[nonAffineTensorKey][7] = 32; + worksetSizeMap[nonAffineTensorKey][8] = 32; + worksetSizeMap[nonAffineTensorKey][9] = 16; + worksetSizeMap[nonAffineTensorKey][10] = 16; + + worksetSizeMap[affineTensorKey][1] = 32768; + worksetSizeMap[affineTensorKey][2] = 16384; + worksetSizeMap[affineTensorKey][3] = 8192; + worksetSizeMap[affineTensorKey][4] = 4096; + worksetSizeMap[affineTensorKey][5] = 4096; + worksetSizeMap[affineTensorKey][6] = 2048; + worksetSizeMap[affineTensorKey][7] = 32; + worksetSizeMap[affineTensorKey][8] = 16; + worksetSizeMap[affineTensorKey][9] = 16; + worksetSizeMap[affineTensorKey][10] = 16; + } } // BestOpenMP_16 case break; case BestCuda: @@ -953,6 +1050,23 @@ map,map > getWorksetSizeM worksetSizeMap[affineTensorKey][7] = 256; worksetSizeMap[affineTensorKey][8] = 128; } // L^2 formulation + { + // VectorWeightedPoisson + // TODO: set this with some actual calibration result values. For now, we just borrow from Poisson + + FormulationChoice formulation = VectorWeightedPoisson; + tuple standardKey {mode,formulation,Standard}; + tuple nonAffineTensorKey {mode,formulation,NonAffineTensor}; + tuple affineTensorKey {mode,formulation,AffineTensor}; + + tuple standardKey_Poisson {mode,Poisson,Standard}; + tuple nonAffineTensorKey_Poisson {mode,Poisson,NonAffineTensor}; + tuple affineTensorKey_Poisson {mode,Poisson,AffineTensor}; + + worksetSizeMap[standardKey] = worksetSizeMap[standardKey_Poisson]; + worksetSizeMap[nonAffineTensorKey] = worksetSizeMap[nonAffineTensorKey_Poisson]; + worksetSizeMap[affineTensorKey] = worksetSizeMap[affineTensorKey_Poisson]; + } } // BestCuda case break; case Precalibrated: @@ -1128,6 +1242,7 @@ int main( int argc, char* argv[] ) return -1; } + Teuchos::RCP> vectorWeight1, vectorWeight2; // used for VectorWeightedPoisson vector formulationChoices; if (formulationChoiceString == "All") { @@ -1153,6 +1268,17 @@ int main( int argc, char* argv[] ) { formulationChoices = vector{L2}; } + else if (formulationChoiceString == "VectorWeightedPoisson") + { + formulationChoices = vector{VectorWeightedPoisson}; + vectorWeight1 = Teuchos::rcp( new Kokkos::Array() ); + vectorWeight2 = Teuchos::rcp( new Kokkos::Array() ); + for (int d=0; d > assembledMatrices; for (auto algorithmChoice : algorithmChoices) { - int worksetSize = worksetSizeMap[algorithmChoice]; + int worksetSize = 1; + if (worksetSizeMap.find(algorithmChoice) != worksetSizeMap.end()) + worksetSize = worksetSizeMap[algorithmChoice]; if (mode == Calibration) { // if this workset size is bigger than the optimal for p-1, skip it -- it's highly @@ -1428,13 +1556,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStandardQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1456,13 +1584,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1485,13 +1613,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, worksetSize, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: @@ -1520,13 +1648,13 @@ int main( int argc, char* argv[] ) case Nodal: { using BasisFamily = DerivedNodalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Hierarchical: { using BasisFamily = HierarchicalBasisFamily; - assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount); + assembledMatrix = performStructuredQuadrature(formulation, geometry, polyOrder, numCells, transformIntegrateFlopCount, jacobianCellMeasureFlopCount, vectorWeight1, vectorWeight2); } break; case Serendipity: From 44ef23d84827a8cd2eb4007718109bc7bc7ff217 Mon Sep 17 00:00:00 2001 From: Brian Kelley Date: Tue, 27 Aug 2024 12:01:49 -0600 Subject: [PATCH 13/23] KokkosKernels: improve CRS sorting performance The changes exactly match github.com/kokkos/kokkos-kernels/pull/2293. Improves performance of sort_crs_matrix by up to 6.3x. --- .../common/src/KokkosKernels_SimpleUtils.hpp | 20 +- .../common/src/KokkosKernels_Utils.hpp | 6 + .../perf_test/sparse/CMakeLists.txt | 9 + .../sparse/KokkosSparse_sort_crs.cpp | 103 +++ .../impl/KokkosSparse_sort_crs_impl.hpp | 366 +++++++++ .../sparse/src/KokkosSparse_SortCrs.hpp | 727 ++++++------------ .../sparse/src/KokkosSparse_Utils.hpp | 13 + 7 files changed, 767 insertions(+), 477 deletions(-) create mode 100644 packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp create mode 100644 packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp index 0ae29a2f50e0..51ff697bde1e 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_SimpleUtils.hpp @@ -358,13 +358,19 @@ struct ReduceMaxFunctor { }; template -void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce, +void kk_view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce, typename view_type::non_const_value_type &max_reduction) { - typedef Kokkos::RangePolicy my_exec_space; - Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", my_exec_space(0, num_elements), + typedef Kokkos::RangePolicy policy_t; + Kokkos::parallel_reduce("KokkosKernels::Common::ReduceMax", policy_t(exec, 0, num_elements), ReduceMaxFunctor(view_to_reduce), max_reduction); } +template +void kk_view_reduce_max(size_t num_elements, view_type view_to_reduce, + typename view_type::non_const_value_type &max_reduction) { + kk_view_reduce_max(MyExecSpace(), num_elements, view_to_reduce, max_reduction); +} + // xorshift hash/pseudorandom function (supported for 32- and 64-bit integer // types only) template @@ -429,10 +435,14 @@ struct SequentialFillFunctor { val_type start; }; +template +void sequential_fill(const ExecSpace &exec, const V &v, typename V::non_const_value_type start = 0) { + Kokkos::parallel_for(Kokkos::RangePolicy(exec, 0, v.extent(0)), SequentialFillFunctor(v, start)); +} + template void sequential_fill(const V &v, typename V::non_const_value_type start = 0) { - Kokkos::parallel_for(Kokkos::RangePolicy(0, v.extent(0)), - SequentialFillFunctor(v, start)); + sequential_fill(typename V::execution_space(), v, start); } } // namespace Impl diff --git a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp index a087002d3142..f0add80c50ed 100644 --- a/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp +++ b/packages/kokkos-kernels/common/src/KokkosKernels_Utils.hpp @@ -1076,6 +1076,12 @@ void view_reduce_max(size_t num_elements, view_type view_to_reduce, kk_view_reduce_max(num_elements, view_to_reduce, max_reduction); } +template +void view_reduce_max(const MyExecSpace &exec, size_t num_elements, view_type view_to_reduce, + typename view_type::non_const_value_type &max_reduction) { + kk_view_reduce_max(exec, num_elements, view_to_reduce, max_reduction); +} + template struct ReduceRowSizeFunctor { const size_type *rowmap_view_begins; diff --git a/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt b/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt index ef0bf7d99530..514ef0ed8253 100644 --- a/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt +++ b/packages/kokkos-kernels/perf_test/sparse/CMakeLists.txt @@ -116,6 +116,15 @@ KOKKOSKERNELS_ADD_EXECUTABLE( SOURCES KokkosSparse_mdf.cpp ) +# For the sake of build times, don't build this CRS sorting perf test by default. +# It can be enabled if needed by setting -DKokkosKernels_ENABLE_SORT_CRS_PERFTEST=ON. +if (KokkosKernels_ENABLE_SORT_CRS_PERFTEST) + KOKKOSKERNELS_ADD_EXECUTABLE( + sparse_sort_crs + SOURCES KokkosSparse_sort_crs.cpp +) +endif () + if (KokkosKernels_ENABLE_BENCHMARK) KOKKOSKERNELS_ADD_BENCHMARK( sparse_par_ilut diff --git a/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp b/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp new file mode 100644 index 000000000000..cd3ed91521d5 --- /dev/null +++ b/packages/kokkos-kernels/perf_test/sparse/KokkosSparse_sort_crs.cpp @@ -0,0 +1,103 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#include +#include +#include "KokkosKernels_config.h" +#include "KokkosSparse_IOUtils.hpp" +#include "KokkosKernels_perf_test_utilities.hpp" + +#include "KokkosSparse_CrsMatrix.hpp" +#include "KokkosSparse_SortCrs.hpp" + +using perf_test::CommonInputParams; + +struct LocalParams { + std::string mtxFile; +}; + +void print_options() { + std::cerr << "Options\n" << std::endl; + + std::cerr << perf_test::list_common_options(); + + std::cerr << "\t[Required] --mtx :: matrix to sort\n"; + std::cerr << "\t[Optional] --repeat :: how many times to repeat sorting\n"; +} + +int parse_inputs(LocalParams& params, int argc, char** argv) { + for (int i = 1; i < argc; ++i) { + if (perf_test::check_arg_str(i, argc, argv, "--mtx", params.mtxFile)) { + ++i; + } else { + std::cerr << "Unrecognized command line argument #" << i << ": " << argv[i] << std::endl; + print_options(); + return 1; + } + } + return 0; +} + +template +void run_experiment(int argc, char** argv, const CommonInputParams& common_params) { + using namespace KokkosSparse; + + using mem_space = typename exec_space::memory_space; + using device_t = typename Kokkos::Device; + using size_type = default_size_type; + using lno_t = default_lno_t; + using scalar_t = default_scalar; + using crsMat_t = KokkosSparse::CrsMatrix; + + using graph_t = typename crsMat_t::StaticCrsGraphType; + + LocalParams params; + if (parse_inputs(params, argc, argv)) return; + + crsMat_t A = KokkosSparse::Impl::read_kokkos_crst_matrix(params.mtxFile.c_str()); + std::cout << "Loaded matrix: " << A.numRows() << "x" << A.numCols() << " with " << A.nnz() << " entries.\n"; + // This first sort call serves as a warm-up + KokkosSparse::sort_crs_matrix(A); + lno_t m = A.numRows(); + lno_t n = A.numCols(); + auto rowmapHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.row_map); + auto entriesHost = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), A.graph.entries); + typename crsMat_t::index_type shuffledEntries("shuffled entries", A.nnz()); + // Randomly shuffle the entries within each row, so that the rows aren't + // already sorted. Leave the values alone; this changes the matrix numerically + // but this doesn't affect sorting. + for (lno_t i = 0; i < m; i++) { + std::random_shuffle(entriesHost.data() + i, entriesHost.data() + i + 1); + } + Kokkos::deep_copy(shuffledEntries, entriesHost); + exec_space exec; + Kokkos::Timer timer; + double totalTime = 0; + for (int rep = 0; rep < common_params.repeat; rep++) { + Kokkos::deep_copy(exec, A.graph.entries, shuffledEntries); + exec.fence(); + timer.reset(); + KokkosSparse::sort_crs_matrix(exec, A); + exec.fence(); + totalTime += timer.seconds(); + } + std::cout << "Mean sort_crs_matrix time over " << common_params.repeat << " trials: "; + std::cout << totalTime / common_params.repeat << "\n"; +} + +#define KOKKOSKERNELS_PERF_TEST_NAME run_experiment +#include "KokkosKernels_perf_test_instantiation.hpp" +int main(int argc, char** argv) { return main_instantiation(argc, argv); } // main diff --git a/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp new file mode 100644 index 000000000000..5e18c3fd5ca2 --- /dev/null +++ b/packages/kokkos-kernels/sparse/impl/KokkosSparse_sort_crs_impl.hpp @@ -0,0 +1,366 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER +#ifndef _KOKKOSSPARSE_SORTCRS_IMPL_HPP +#define _KOKKOSSPARSE_SORTCRS_IMPL_HPP + +#include "Kokkos_Core.hpp" +#include "Kokkos_Sort.hpp" +#include "KokkosKernels_Sorting.hpp" + +// Workaround for issue with Kokkos::Experimental::sort_by_key, with nvcc and OpenMP enabled +// (Kokkos issue #7036, fixed in 4.4 release) +// Once support for Kokkos < 4.4 is dropped, +// all code inside "ifdef KK_DISABLE_BULK_SORT_BY_KEY" can be deleted. +#if (KOKKOS_VERSION < 40400) && defined(KOKKOS_ENABLE_CUDA) +#define KK_DISABLE_BULK_SORT_BY_KEY +#endif + +namespace KokkosSparse { +namespace Impl { + +template +struct MatrixRadixSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::non_const_value_type; + using UnsignedOrdinal = typename std::make_unsigned::type; + using Scalar = typename values_t::non_const_value_type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + using values_managed_t = Kokkos::View; + + MatrixRadixSortFunctor(const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) + : rowmap(rowmap_), entries(entries_), values(values_) { + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); + valuesAux = values_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), values.extent(0)); + } + + KOKKOS_INLINE_FUNCTION void operator()(Ordinal i) const { + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + Ordinal rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + KokkosKernels::SerialRadixSort2( + (UnsignedOrdinal*)entries.data() + rowStart, (UnsignedOrdinal*)entriesAux.data() + rowStart, + values.data() + rowStart, valuesAux.data() + rowStart, rowNum); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; + values_t values; + values_managed_t valuesAux; +}; + +template +struct MatrixThreadSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + + MatrixThreadSortFunctor(Ordinal numRows_, const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) + : numRows(numRows_), rowmap(rowmap_), entries(entries_), values(values_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const typename Policy::member_type& t) const { + Ordinal i = t.league_rank() * t.team_size() + t.team_rank(); + if (i >= numRows) return; + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + auto rowEntries = Kokkos::subview(entries, Kokkos::make_pair(rowStart, rowEnd)); + auto rowValues = Kokkos::subview(values, Kokkos::make_pair(rowStart, rowEnd)); + Kokkos::Experimental::sort_by_key_thread(t, rowEntries, rowValues); + } + + Ordinal numRows; + rowmap_t rowmap; + entries_t entries; + values_t values; +}; + +template +struct GraphRadixSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::non_const_value_type; + using UnsignedOrdinal = typename std::make_unsigned::type; + // The functor owns memory for entriesAux, so it can't have + // MemoryTraits + using entries_managed_t = Kokkos::View; + + GraphRadixSortFunctor(const rowmap_t& rowmap_, const entries_t& entries_) : rowmap(rowmap_), entries(entries_) { + entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); + } + + KOKKOS_INLINE_FUNCTION void operator()(Ordinal i) const { + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + Ordinal rowNum = rowEnd - rowStart; + // Radix sort requires unsigned keys for comparison + KokkosKernels::SerialRadixSort((UnsignedOrdinal*)entries.data() + rowStart, + (UnsignedOrdinal*)entriesAux.data() + rowStart, rowNum); + } + + rowmap_t rowmap; + entries_t entries; + entries_managed_t entriesAux; +}; + +template +struct GraphThreadSortFunctor { + using Offset = typename rowmap_t::non_const_value_type; + + GraphThreadSortFunctor(Ordinal numRows_, const rowmap_t& rowmap_, const entries_t& entries_) + : numRows(numRows_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(const typename Policy::member_type& t) const { + Ordinal i = t.league_rank() * t.team_size() + t.team_rank(); + if (i >= numRows) return; + Offset rowStart = rowmap(i); + Offset rowEnd = rowmap(i + 1); + auto rowEntries = Kokkos::subview(entries, Kokkos::make_pair(rowStart, rowEnd)); + Kokkos::Experimental::sort_thread(t, rowEntries); + } + + Ordinal numRows; + rowmap_t rowmap; + entries_t entries; +}; + +template +struct MergedRowmapFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using c_rowmap_t = typename rowmap_t::const_type; + + // Precondition: entries are sorted within each row + MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, const entries_t& entries_) + : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with + mergedCounts(row) = 0; + return; + } + // Otherwise, the first entry in the row exists + lno_t uniqueEntries = 1; + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (entries(j - 1) != entries(j)) uniqueEntries++; + } + mergedCounts(row) = uniqueEntries; + lnewNNZ += uniqueEntries; + if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; + } + + rowmap_t mergedCounts; + c_rowmap_t rowmap; + entries_t entries; +}; + +template +struct MatrixMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + using scalar_t = typename values_t::non_const_value_type; + + // Precondition: entries are sorted within each row + MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const values_t& values_, const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_, + const values_t& mergedValues_) + : rowmap(rowmap_), + entries(entries_), + values(values_), + mergedRowmap(mergedRowmap_), + mergedEntries(mergedEntries_), + mergedValues(mergedValues_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + scalar_t accumVal = values(rowBegin); + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol == entries(j)) { + // accumulate + accumVal += values(j); + } else { + // write out and reset + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + insertPos++; + accumVal = values(j); + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedValues(insertPos) = accumVal; + mergedEntries(insertPos) = accumCol; + } + + typename rowmap_t::const_type rowmap; + entries_t entries; + values_t values; + rowmap_t mergedRowmap; + entries_t mergedEntries; + values_t mergedValues; +}; + +template +struct GraphMergedEntriesFunctor { + using size_type = typename rowmap_t::non_const_value_type; + using lno_t = typename entries_t::non_const_value_type; + + // Precondition: entries are sorted within each row + GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, + const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_) + : rowmap(rowmap_), entries(entries_), mergedRowmap(mergedRowmap_), mergedEntries(mergedEntries_) {} + + KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { + size_type rowBegin = rowmap(row); + size_type rowEnd = rowmap(row + 1); + if (rowEnd == rowBegin) { + // Row was empty to begin with, nothing to do + return; + } + // Otherwise, accumulate the value for each column + lno_t accumCol = entries(rowBegin); + size_type insertPos = mergedRowmap(row); + for (size_type j = rowBegin + 1; j < rowEnd; j++) { + if (accumCol != entries(j)) { + // write out and reset + mergedEntries(insertPos) = accumCol; + insertPos++; + accumCol = entries(j); + } + } + // always left with the last unique entry + mergedEntries(insertPos) = accumCol; + } + + typename rowmap_t::const_type rowmap; + entries_t entries; + rowmap_t mergedRowmap; + entries_t mergedEntries; +}; + +template +struct MaxScanFunctor { + using value_type = uint64_t; + + MaxScanFunctor(uint64_t ncols_, const Keys& keys_, const Entries& entries_) + : ncols(ncols_), keys(keys_), entries(entries_) {} + + KOKKOS_INLINE_FUNCTION + void init(uint64_t& update) const { update = 0; } + + KOKKOS_INLINE_FUNCTION + void join(uint64_t& update, const uint64_t& input) const { update = Kokkos::max(update, input); } + + KOKKOS_INLINE_FUNCTION + void operator()(Offset i, uint64_t& lmax, bool finalPass) const { + lmax = Kokkos::max(lmax, keys(i)); + if (finalPass) { + // lmax is the row containing entry i. + // The key is equivalent to the entry's linear + // index if the matrix were dense and row-major. + keys(i) = lmax * ncols + entries(i); + } + } + + uint64_t ncols; + Keys keys; + Entries entries; +}; + +template +Kokkos::View generateBulkCrsKeys(const ExecSpace& exec, const Rowmap& rowmap, + const Entries& entries, + typename Entries::non_const_value_type ncols) { + using Offset = typename Rowmap::non_const_value_type; + using Ordinal = typename Entries::non_const_value_type; + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + Kokkos::View keys("keys", entries.extent(0)); + Kokkos::parallel_for( + "CRS bulk sorting: mark row begins", Kokkos::RangePolicy(exec, 0, numRows), KOKKOS_LAMBDA(Ordinal i) { + Offset rowBegin = rowmap(i); + // Only mark the beginnings of non-empty rows. + // Otherwise multiple rows could try to update the same key. + if (rowmap(i + 1) != rowBegin) { + keys(rowBegin) = uint64_t(i); + } + }); + Kokkos::fence(); + Kokkos::parallel_scan("CRS bulk sorting: compute keys", Kokkos::RangePolicy(exec, 0, entries.extent(0)), + MaxScanFunctor(ncols, keys, entries)); + Kokkos::fence(); + return keys; +} + +#ifndef KK_DISABLE_BULK_SORT_BY_KEY +template +Kokkos::View computeEntryPermutation( + const ExecSpace& exec, const Rowmap& rowmap, const Entries& entries, typename Entries::non_const_value_type ncols) { + using Offset = typename Rowmap::non_const_value_type; + auto keys = generateBulkCrsKeys(exec, rowmap, entries, ncols); + Kokkos::View permutation(Kokkos::view_alloc(Kokkos::WithoutInitializing, "permutation"), + entries.extent(0)); + // This initializes permutation as the identity + KokkosKernels::Impl::sequential_fill(exec, permutation); + Kokkos::Experimental::sort_by_key(exec, keys, permutation); + return permutation; +} + +// Heuristic for choosing bulk sorting algorithm +template +bool useBulkSortHeuristic(Ordinal avgDeg, Ordinal maxDeg) { + // Use bulk sort if matrix is highly imbalanced, + // OR the longest rows have many entries. + return (maxDeg / 10 > avgDeg) || (maxDeg > 1024); +} +#endif + +template +void applyPermutation(const ExecSpace& exec, const Permutation& permutation, const InView& in, const OutView& out) { + Kokkos::parallel_for( + "CRS bulk sorting: permute", Kokkos::RangePolicy(exec, 0, in.extent(0)), + KOKKOS_LAMBDA(size_t i) { out(i) = in(permutation(i)); }); +} + +template +void applyPermutationBlockValues(const ExecSpace& exec, const Permutation& permutation, const InView& in, + const OutView& out, Ordinal blockSize) { + uint64_t scalarsPerBlock = (uint64_t)blockSize * blockSize; + if (in.extent(0) % scalarsPerBlock) + throw std::invalid_argument( + "sort_bsr_matrix: matrix values extent not divisible by graph entries " + "extent"); + Kokkos::parallel_for( + "BSR bulk sorting: permute", Kokkos::RangePolicy(exec, 0, in.extent(0)), KOKKOS_LAMBDA(size_t i) { + uint64_t blockIndex = i / scalarsPerBlock; + uint64_t offsetInBlock = i % scalarsPerBlock; + out(i) = in(permutation(blockIndex) * scalarsPerBlock + offsetInBlock); + }); +} + +} // namespace Impl +} // namespace KokkosSparse + +#endif diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp index 455068b56f43..1203cd244b5b 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_SortCrs.hpp @@ -16,38 +16,11 @@ #ifndef _KOKKOSSPARSE_SORTCRS_HPP #define _KOKKOSSPARSE_SORTCRS_HPP -#include "Kokkos_Core.hpp" -#include "KokkosKernels_Sorting.hpp" +#include "KokkosSparse_sort_crs_impl.hpp" +#include "KokkosSparse_Utils.hpp" namespace KokkosSparse { -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values); - -// Sort a BRS matrix on the given execution space instance: within each row, -// sort entries ascending by column and permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values); - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -void sort_bsr_matrix(const bsrMat_t& A); - -// Sort a BRS matrix on the given execution space instance: within each row, -// sort entries ascending by column and permute the values accordingly. -template -void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMat_t& A); - // ---------------------------------- // CRS matrix/graph sorting utilities // ---------------------------------- @@ -63,269 +36,13 @@ void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMa // duplicated entries in A, A is sorted and returned (instead of a newly // allocated matrix). -namespace Impl { - -template -struct SortCrsMatrixFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - using values_managed_t = Kokkos::View; - - SortCrsMatrixFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_, const values_t& values_) - : rowmap(rowmap_), entries(entries_), values(values_) { - if (usingRangePol) { - entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); - valuesAux = values_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Values aux"), values.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort2( - (unsigned_lno_t*)entries.data() + rowStart, (unsigned_lno_t*)entriesAux.data() + rowStart, - values.data() + rowStart, valuesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort2(entries.data() + rowStart, - values.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; - values_t values; - values_managed_t valuesAux; -}; - -template -struct SortCrsGraphFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using team_mem = typename Kokkos::TeamPolicy::member_type; - // The functor owns memory for entriesAux, so it can't have - // MemoryTraits - using entries_managed_t = Kokkos::View; - - SortCrsGraphFunctor(bool usingRangePol, const rowmap_t& rowmap_, const entries_t& entries_) - : rowmap(rowmap_), entries(entries_) { - if (usingRangePol) { - entriesAux = entries_managed_t(Kokkos::view_alloc(Kokkos::WithoutInitializing, "Entries aux"), entries.extent(0)); - } - // otherwise, aux arrays won't be allocated (sorting in place) - } - - KOKKOS_INLINE_FUNCTION void operator()(const lno_t i) const { - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - // Radix sort requires unsigned keys for comparison - using unsigned_lno_t = typename std::make_unsigned::type; - KokkosKernels::SerialRadixSort((unsigned_lno_t*)entries.data() + rowStart, - (unsigned_lno_t*)entriesAux.data() + rowStart, rowNum); - } - - KOKKOS_INLINE_FUNCTION void operator()(const team_mem t) const { - size_type i = t.league_rank(); - size_type rowStart = rowmap(i); - size_type rowEnd = rowmap(i + 1); - lno_t rowNum = rowEnd - rowStart; - KokkosKernels::TeamBitonicSort(entries.data() + rowStart, rowNum, t); - } - - rowmap_t rowmap; - entries_t entries; - entries_managed_t entriesAux; -}; - -template -struct MergedRowmapFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using c_rowmap_t = typename rowmap_t::const_type; - - // Precondition: entries are sorted within each row - MergedRowmapFunctor(const rowmap_t& mergedCounts_, const c_rowmap_t& rowmap_, const entries_t& entries_) - : mergedCounts(mergedCounts_), rowmap(rowmap_), entries(entries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row, size_type& lnewNNZ) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with - mergedCounts(row) = 0; - return; - } - // Otherwise, the first entry in the row exists - lno_t uniqueEntries = 1; - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (entries(j - 1) != entries(j)) uniqueEntries++; - } - mergedCounts(row) = uniqueEntries; - lnewNNZ += uniqueEntries; - if (row == lno_t((rowmap.extent(0) - 1) - 1)) mergedCounts(row + 1) = 0; - } - - rowmap_t mergedCounts; - c_rowmap_t rowmap; - entries_t entries; -}; - -template -struct MatrixMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - using scalar_t = typename values_t::non_const_value_type; - - // Precondition: entries are sorted within each row - MatrixMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, - const values_t& values_, const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_, - const values_t& mergedValues_) - : rowmap(rowmap_), - entries(entries_), - values(values_), - mergedRowmap(mergedRowmap_), - mergedEntries(mergedEntries_), - mergedValues(mergedValues_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - scalar_t accumVal = values(rowBegin); - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol == entries(j)) { - // accumulate - accumVal += values(j); - } else { - // write out and reset - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - insertPos++; - accumVal = values(j); - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedValues(insertPos) = accumVal; - mergedEntries(insertPos) = accumCol; - } - - typename rowmap_t::const_type rowmap; - entries_t entries; - values_t values; - rowmap_t mergedRowmap; - entries_t mergedEntries; - values_t mergedValues; -}; - -template -struct GraphMergedEntriesFunctor { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::non_const_value_type; - - // Precondition: entries are sorted within each row - GraphMergedEntriesFunctor(const typename rowmap_t::const_type& rowmap_, const entries_t& entries_, - const rowmap_t& mergedRowmap_, const entries_t& mergedEntries_) - : rowmap(rowmap_), entries(entries_), mergedRowmap(mergedRowmap_), mergedEntries(mergedEntries_) {} - - KOKKOS_INLINE_FUNCTION void operator()(lno_t row) const { - size_type rowBegin = rowmap(row); - size_type rowEnd = rowmap(row + 1); - if (rowEnd == rowBegin) { - // Row was empty to begin with, nothing to do - return; - } - // Otherwise, accumulate the value for each column - lno_t accumCol = entries(rowBegin); - size_type insertPos = mergedRowmap(row); - for (size_type j = rowBegin + 1; j < rowEnd; j++) { - if (accumCol != entries(j)) { - // write out and reset - mergedEntries(insertPos) = accumCol; - insertPos++; - accumCol = entries(j); - } - } - // always left with the last unique entry - mergedEntries(insertPos) = accumCol; - } - - typename rowmap_t::const_type rowmap; - entries_t entries; - rowmap_t mergedRowmap; - entries_t mergedEntries; -}; - -template -KOKKOS_INLINE_FUNCTION void kk_swap(T& a, T& b) { - T t = a; - a = b; - b = t; -} - -template -struct sort_bsr_functor { - using lno_t = typename entries_type::non_const_value_type; - - row_map_type rowmap; - entries_type entries; - values_type values; - const lno_t blocksize; - - sort_bsr_functor(row_map_type rowmap_, entries_type entries_, values_type values_, const lno_t blocksize_) - : rowmap(rowmap_), entries(entries_), values(values_), blocksize(blocksize_) {} - - KOKKOS_INLINE_FUNCTION - void operator()(const lno_t i) const { - const lno_t rowStart = rowmap(i); - const lno_t rowSize = rowmap(i + 1) - rowStart; - auto* e = entries.data() + rowStart; - auto* v = values.data() + rowStart * blocksize; - bool done = false; - while (!done) { - done = true; - for (lno_t j = 1; j < rowSize; ++j) { - const lno_t jp = j - 1; - if (e[jp] <= e[j]) continue; - Impl::kk_swap(e[jp], e[j]); - auto const vb = v + j * blocksize; - auto const vbp = v + jp * blocksize; - for (lno_t k = 0; k < blocksize; ++k) // std::swap_ranges(vb, vb + blocksize, vbp); - Impl::kk_swap(vb[k], vbp[k]); - done = false; - } - } - } -}; - -} // namespace Impl - // Sort a CRS matrix: within each row, sort entries ascending by column. // At the same time, permute the values. template void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { + const values_t& values, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { static_assert(Kokkos::SpaceAccessibility::accessible, "sort_crs_matrix: rowmap_t is not accessible from the given execution " "space"); @@ -338,71 +55,156 @@ void sort_crs_matrix(const execution_space& exec, const rowmap_t& rowmap, const static_assert(!std::is_const_v, "sort_crs_matrix: entries_t must not be const-valued"); static_assert(!std::is_const_v, "sort_crs_matrix: value_t must not be const-valued"); - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsMatrixFunctor funct(useRadix, rowmap, entries, values); - if (useRadix) { - Kokkos::parallel_for("sort_crs_matrix", Kokkos::RangePolicy(exec, 0, numRows), funct); + using Ordinal = typename entries_t::non_const_value_type; + // This early return condition covers having 0 or 1 entries, + // which is also implied by having 0 rows or 0 columns. + // If only 1 entry, the matrix is already sorted. + if (entries.extent(0) <= size_t(1)) { + return; + } + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if constexpr (!KokkosKernels::Impl::kk_is_gpu_exec_space()) { + // On CPUs, use a sequential radix sort within each row. + Kokkos::parallel_for("sort_crs_matrix[CPU,radix]", + Kokkos::RangePolicy>(exec, 0, numRows), + Impl::MatrixRadixSortFunctor(rowmap, entries, values)); } else { - // Try to get teamsize to be largest power of 2 not greater than avg entries - // per row - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; + // On GPUs: + // If the matrix is highly imbalanced, or has long rows AND the dimensions + // are not too large to do one large bulk sort, do that. Otherwise, sort + // using one Kokkos thread per row. + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; +#ifndef KK_DISABLE_BULK_SORT_BY_KEY + Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); + bool useBulkSort = false; + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + // Calculate the true number of columns if user didn't pass it in + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + useBulkSort = maxBulkKey / numRows == (uint64_t)numCols; + } + if (useBulkSort) { + auto permutation = KokkosSparse::Impl::computeEntryPermutation(exec, rowmap, entries, numCols); + // Permutations cannot be done in-place + Kokkos::View origValues( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origValues"), values.extent(0)); + Kokkos::View origEntries( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origEntries"), entries.extent(0)); + Kokkos::deep_copy(exec, origValues, values); + Kokkos::deep_copy(exec, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origValues, values); + } else +#else + (void)numCols; +#endif + { + using TeamPol = Kokkos::TeamPolicy; + // Can't use bulk sort approach as matrix dimensions are too large. + // Fall back to parallel thread-level sort within each row. + Ordinal vectorLength = 1; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + Impl::MatrixThreadSortFunctor funct(numRows, rowmap, entries, + values); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_crs_matrix[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); } - team_pol temp(exec, numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_matrix", team_pol(exec, numRows, teamSize), funct); } } template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_crs_matrix(execution_space(), rowmap, entries, values); +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + typename entries_t::const_value_type numCols = + Kokkos::ArithTraits::max()) { + sort_crs_matrix(execution_space(), rowmap, entries, values, numCols); } template -void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, values); +void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + typename entries_t::const_value_type numCols = + Kokkos::ArithTraits::max()) { + sort_crs_matrix(typename entries_t::execution_space(), rowmap, entries, values, numCols); } template void sort_crs_matrix(const typename crsMat_t::execution_space& exec, const crsMat_t& A) { - sort_crs_matrix(exec, A.graph.row_map, A.graph.entries, A.values); + sort_crs_matrix(exec, A.graph.row_map, A.graph.entries, A.values, A.numCols()); } template void sort_crs_matrix(const crsMat_t& A) { - sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, A.graph.entries, A.values); + sort_crs_matrix(typename crsMat_t::execution_space(), A.graph.row_map, A.graph.entries, A.values, A.numCols()); } // Sort a BRS matrix: within each row, sort entries ascending by column and // permute the values accordingly. -template -void sort_bsr_matrix(const execution_space& exec, const lno_t blockdim, const rowmap_t& rowmap, - const entries_t& entries, const values_t& values) { - // TODO: this is O(N^2) mock for debugging - do regular implementation based - // on Radix/Bitonic sort (like CSR) IDEA: maybe we need only one general - // Radix2/Bitonic2 and CSR sorting may call it with blockSize=1 ? - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - const lno_t blocksize = blockdim * blockdim; - - assert(values.extent(0) == entries.extent(0) * blocksize); - Impl::sort_bsr_functor bsr_sorter(rowmap, entries, values, blocksize); - Kokkos::parallel_for("sort_bsr_matrix", Kokkos::RangePolicy(exec, 0, numRows), bsr_sorter); -} - -template -void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - sort_bsr_matrix(execution_space(), blockdim, rowmap, entries, values); +template +void sort_bsr_matrix(const execution_space& exec, Ordinal blockSize, const rowmap_t& rowmap, const entries_t& entries, + const values_t& values, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { + static_assert(std::is_same_v, + "sort_bsr_matrix: Ordinal type must match nonconst value type of " + "entries_t (default template parameter)"); + if (entries.extent(0) <= size_t(1)) { + return; + } + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + if (maxBulkKey / numRows != (uint64_t)numCols) + throw std::invalid_argument( + "sort_bsr_matrix: implementation requires that numRows * numCols is " + "representable in uint64_t"); +#ifdef KK_DISABLE_BULK_SORT_BY_KEY + using TeamPol = Kokkos::TeamPolicy; + using Offset = typename rowmap_t::non_const_value_type; + // Temporary workaround: do not use Kokkos::Experimental::sort_by_key, instead + // sort bulk keys one row at a time + auto keys = Impl::generateBulkCrsKeys(exec, rowmap, entries, numCols); + Kokkos::View permutation(Kokkos::view_alloc(Kokkos::WithoutInitializing, "permutation"), + entries.extent(0)); + KokkosKernels::Impl::sequential_fill(exec, permutation); + Ordinal vectorLength = 1; + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + Impl::MatrixThreadSortFunctor funct( + numRows, rowmap, keys, permutation); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_bulk_keys_by_row[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); +#else + auto permutation = KokkosSparse::Impl::computeEntryPermutation(exec, rowmap, entries, numCols); +#endif + // Permutations cannot be done in-place + Kokkos::View origValues( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origValues"), values.extent(0)); + Kokkos::View origEntries( + Kokkos::view_alloc(Kokkos::WithoutInitializing, "origEntries"), entries.extent(0)); + Kokkos::deep_copy(exec, origValues, values); + Kokkos::deep_copy(exec, origEntries, entries); + KokkosSparse::Impl::applyPermutation(exec, permutation, origEntries, entries); + KokkosSparse::Impl::applyPermutationBlockValues(exec, permutation, origValues, values, blockSize); +} + +template +void sort_bsr_matrix(Ordinal blockdim, const rowmap_t& rowmap, const entries_t& entries, const values_t& values, + Ordinal numCols = Kokkos::ArithTraits::max()) { + sort_bsr_matrix(execution_space(), blockdim, rowmap, entries, values, numCols); } // Sort a BSR matrix (like CRS but single values are replaced with contignous @@ -413,7 +215,7 @@ void sort_bsr_matrix(const typename bsrMat_t::execution_space& exec, const bsrMa // directly sort_bsr_matrix( - exec, A.blockDim(), A.graph.row_map, A.graph.entries, A.values); + exec, A.blockDim(), A.graph.row_map, A.graph.entries, A.values, A.numCols()); } template @@ -423,9 +225,10 @@ void sort_bsr_matrix(const bsrMat_t& A) { // Sort a CRS graph: within each row, sort entries ascending by column. template -void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries) { - using lno_t = typename entries_t::non_const_value_type; - using team_pol = Kokkos::TeamPolicy; +void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const entries_t& entries, + typename entries_t::non_const_value_type numCols = + Kokkos::ArithTraits::max()) { + using Ordinal = typename entries_t::non_const_value_type; static_assert(Kokkos::SpaceAccessibility::accessible, "sort_crs_graph: rowmap_t is not accessible from the given execution " "space"); @@ -433,27 +236,55 @@ void sort_crs_graph(const execution_space& exec, const rowmap_t& rowmap, const e "sort_crs_graph: entries_t is not accessible from the given execution " "space"); static_assert(!std::is_const_v, "sort_crs_graph: entries_t must not be const-valued"); - bool useRadix = !KokkosKernels::Impl::kk_is_gpu_exec_space(); - lno_t numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; - if (numRows == 0) return; - Impl::SortCrsGraphFunctor funct(useRadix, rowmap, entries); - if (useRadix) { - Kokkos::parallel_for("sort_crs_graph", Kokkos::RangePolicy(exec, 0, numRows), funct); + Ordinal numRows = rowmap.extent(0) ? rowmap.extent(0) - 1 : 0; + if (entries.extent(0) <= size_t(1)) { + return; + } + if constexpr (!KokkosKernels::Impl::kk_is_gpu_exec_space()) { + // If on CPU, sort each row independently. Don't need to know numCols for + // this. + Kokkos::parallel_for("sort_crs_graph[CPU,radix]", + Kokkos::RangePolicy>(exec, 0, numRows), + Impl::GraphRadixSortFunctor(rowmap, entries)); } else { - // Try to get teamsize to be largest power of 2 less than or equal to - // half the entries per row. 0.5 * #entries is bitonic's parallelism within - // a row. - // TODO (probably important for performnce): add thread-level sort also, and - // use that for small avg degree. But this works for now. - lno_t idealTeamSize = 1; - lno_t avgDeg = (entries.extent(0) + numRows - 1) / numRows; - while (idealTeamSize < avgDeg / 2) { - idealTeamSize *= 2; + // On GPUs: + // If the graph is highly imbalanced AND the dimensions are not too large + // to do one large bulk sort, do that. Otherwise, sort using one Kokkos + // thread per row. + Ordinal avgDeg = (entries.extent(0) + numRows - 1) / numRows; +#ifndef KK_DISABLE_BULK_SORT_BY_KEY + Ordinal maxDeg = KokkosSparse::Impl::graph_max_degree(exec, rowmap); + bool useBulkSort = false; + if (KokkosSparse::Impl::useBulkSortHeuristic(avgDeg, maxDeg)) { + // Calculate the true number of columns if user didn't pass it in + if (numCols == Kokkos::ArithTraits::max()) { + KokkosKernels::Impl::kk_view_reduce_max(exec, entries.extent(0), entries, numCols); + numCols++; + } + uint64_t maxBulkKey = (uint64_t)numRows * (uint64_t)numCols; + useBulkSort = maxBulkKey / numRows == (uint64_t)numCols; + } + if (useBulkSort) { + auto keys = KokkosSparse::Impl::generateBulkCrsKeys(exec, rowmap, entries, numCols); + Kokkos::Experimental::sort_by_key(exec, keys, entries); + } else +#else + (void)numCols; +#endif + { + using TeamPol = Kokkos::TeamPolicy; + // Fall back to thread-level sort within each row + Ordinal vectorLength = 1; + while (vectorLength < avgDeg / 2) { + vectorLength *= 2; + } + if (vectorLength > TeamPol ::vector_length_max()) vectorLength = TeamPol ::vector_length_max(); + + Impl::GraphThreadSortFunctor funct(numRows, rowmap, entries); + Ordinal teamSize = TeamPol(exec, 1, 1, vectorLength).team_size_recommended(funct, Kokkos::ParallelForTag()); + Kokkos::parallel_for("sort_crs_graph[GPU,bitonic]", + TeamPol(exec, (numRows + teamSize - 1) / teamSize, teamSize, vectorLength), funct); } - team_pol temp(exec, numRows, 1); - lno_t maxTeamSize = temp.team_size_max(funct, Kokkos::ParallelForTag()); - lno_t teamSize = std::min(idealTeamSize, maxTeamSize); - Kokkos::parallel_for("sort_crs_graph", team_pol(exec, numRows, teamSize), funct); } } @@ -462,36 +293,38 @@ void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { sort_crs_graph(execution_space(), rowmap, entries); } -// This overload covers 2 cases, while allowing all template args to be deduced: -// - sort_crs_graph(exec, G) -// - sort_crs_graph(rowmap, entries) -template -void sort_crs_graph(const Arg1& a1, const Arg2& a2) { - if constexpr (Kokkos::is_execution_space_v) { - // a1 is an exec instance, a2 is a graph - sort_crs_graph(a1, a2.row_map, a2.entries); - } else if constexpr (Kokkos::is_view_v) { - // a1 is rowmap, a2 is entries - sort_crs_graph(typename Arg2::execution_space(), a1, a2); - } else { - static_assert(Arg1::doesnthavethisthing, - "sort_crs_graph(arg1, arg2): expect either (exec, G) or " - "(rowmap, entries)"); - } +template +typename std::enable_if_t> sort_crs_graph( + const rowmap_t& rowmap, const entries_t& entries, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(typename entries_t::execution_space(), rowmap, entries, numCols); +} + +template +typename std::enable_if_t> sort_crs_graph( + const execution_space& exec, const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(exec, G.row_map, G.entries, numCols); } template -void sort_crs_graph(const crsGraph_t& G) { - sort_crs_graph(typename crsGraph_t::execution_space(), G); +void sort_crs_graph(const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_crs_graph(typename crsGraph_t::execution_space(), G, numCols); } template void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, - entries_t& entries_out, values_t& values_out) { + entries_t& entries_out, values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { using nc_rowmap_t = typename rowmap_t::non_const_type; - using size_type = typename nc_rowmap_t::value_type; - using ordinal_t = typename entries_t::value_type; + using Offset = typename nc_rowmap_t::value_type; + using Ordinal = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; static_assert(Kokkos::SpaceAccessibility::accessible, "sort_and_merge_matrix: rowmap_t is not accessible from the given " @@ -507,8 +340,8 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons static_assert(!std::is_const_v, "sort_and_merge_matrix: value_t must not be const-valued"); - ordinal_t numRows = rowmap_in.extent(0) ? ordinal_t(rowmap_in.extent(0) - 1) : ordinal_t(0); - size_type nnz = entries_in.extent(0); + Ordinal numRows = rowmap_in.extent(0) ? Ordinal(rowmap_in.extent(0) - 1) : Ordinal(0); + Offset nnz = entries_in.extent(0); if (numRows == 0) { rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); @@ -517,13 +350,13 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons return; } - sort_crs_matrix(exec, rowmap_in, entries_in, values_in); + sort_crs_matrix(exec, rowmap_in, entries_in, values_in, numCols); // Count entries per row into a new rowmap, in terms of merges that can be // done nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(exec, 0, numRows), + Offset numCompressedEntries = 0; + Kokkos::parallel_reduce("KokkosSparse::Impl::MergedRowmapFunctor", range_t(exec, 0, numRows), Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (nnz == numCompressedEntries) { @@ -555,7 +388,7 @@ void sort_and_merge_matrix(const exec_space& exec, const typename rowmap_t::cons values_out = values_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged values"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for(range_t(exec, 0, numRows), + Kokkos::parallel_for("KokkosSparse::Impl::MatrixMergedEntriesFunctor", range_t(exec, 0, numRows), Impl::MatrixMergedEntriesFunctor( rowmap_orig, entries_orig, values_orig, rowmap_out, entries_out, values_out)); } @@ -571,7 +404,8 @@ crsMat_t sort_and_merge_matrix(const typename crsMat_t::execution_space& exec, c entries_t entries_out; values_t values_out; - sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, rowmap_out, entries_out, values_out); + sort_and_merge_matrix(exec, A.graph.row_map, A.graph.entries, A.values, rowmap_out, entries_out, values_out, + A.numCols()); return crsMat_t("SortedMerged", A.numRows(), A.numCols(), values_out.extent(0), values_out, rowmap_out, entries_out); } @@ -584,23 +418,29 @@ crsMat_t sort_and_merge_matrix(const crsMat_t& A) { template void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, - values_t& values_out) { - sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, rowmap_out, entries_out, values_out); + values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + sort_and_merge_matrix(exec_space(), rowmap_in, entries_in, values_in, rowmap_out, entries_out, values_out, numCols); } template void sort_and_merge_matrix(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, const values_t& values_in, rowmap_t& rowmap_out, entries_t& entries_out, - values_t& values_out) { + values_t& values_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { sort_and_merge_matrix(typename entries_t::execution_space(), rowmap_in, entries_in, values_in, rowmap_out, - entries_out, values_out); + entries_out, values_out, numCols); } template void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const_type& rowmap_in, - const entries_t& entries_in, rowmap_t& rowmap_out, entries_t& entries_out) { - using size_type = typename rowmap_t::non_const_value_type; - using lno_t = typename entries_t::value_type; + const entries_t& entries_in, rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + using Offset = typename rowmap_t::non_const_value_type; + using Ordinal = typename entries_t::value_type; using range_t = Kokkos::RangePolicy; using nc_rowmap_t = typename rowmap_t::non_const_type; static_assert(Kokkos::SpaceAccessibility::accessible, @@ -612,19 +452,19 @@ void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const static_assert(!std::is_const_v, "sort_and_merge_graph: entries_t must not be const-valued"); - lno_t numRows = rowmap_in.extent(0) ? rowmap_in.extent(0) - 1 : 0; + Ordinal numRows = rowmap_in.extent(0) ? rowmap_in.extent(0) - 1 : 0; if (numRows == 0) { rowmap_out = typename rowmap_t::non_const_type("SortedMerged rowmap", rowmap_in.extent(0)); entries_out = entries_t(); return; } // Sort in place - sort_crs_graph(exec, rowmap_in, entries_in); + sort_crs_graph(exec, rowmap_in, entries_in, numCols); // Count entries per row into a new rowmap, in terms of merges that can be // done nc_rowmap_t nc_rowmap_out(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged rowmap"), numRows + 1); - size_type numCompressedEntries = 0; - Kokkos::parallel_reduce(range_t(exec, 0, numRows), + Offset numCompressedEntries = 0; + Kokkos::parallel_reduce("KokkosSparse::Impl::MergedRowmapFunctor", range_t(exec, 0, numRows), Impl::MergedRowmapFunctor(nc_rowmap_out, rowmap_in, entries_in), numCompressedEntries); if (entries_in.extent(0) == size_t(numCompressedEntries)) { @@ -655,107 +495,50 @@ void sort_and_merge_graph(const exec_space& exec, const typename rowmap_t::const entries_out = entries_t(Kokkos::view_alloc(exec, Kokkos::WithoutInitializing, "SortedMerged entries"), numCompressedEntries); // Compute merged entries and values - Kokkos::parallel_for(range_t(exec, 0, numRows), Impl::GraphMergedEntriesFunctor( - rowmap_orig, entries_orig, rowmap_out, entries_out)); + Kokkos::parallel_for( + "KokkosSparse::Impl::GraphMergedEntriesFunctor", range_t(exec, 0, numRows), + Impl::GraphMergedEntriesFunctor(rowmap_orig, entries_orig, rowmap_out, entries_out)); } template void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, entries_out); + rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(exec_space(), rowmap_in, entries_in, rowmap_out, entries_out, numCols); } template void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, entries_in, rowmap_out, entries_out); + rowmap_t& rowmap_out, entries_t& entries_out, + typename entries_t::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(typename entries_t::execution_space(), rowmap_in, entries_in, rowmap_out, entries_out, + numCols); } template -crsGraph_t sort_and_merge_graph(const typename crsGraph_t::execution_space& exec, const crsGraph_t& G) { +crsGraph_t sort_and_merge_graph( + const typename crsGraph_t::execution_space& exec, const crsGraph_t& G, + typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { using rowmap_t = typename crsGraph_t::row_map_type::non_const_type; using entries_t = typename crsGraph_t::entries_type; static_assert(!std::is_const::value, "sort_and_merge_graph requires StaticCrsGraph entries to be non-const."); rowmap_t mergedRowmap; entries_t mergedEntries; - sort_and_merge_graph(exec, G.row_map, G.entries, mergedRowmap, mergedEntries); + sort_and_merge_graph(exec, G.row_map, G.entries, mergedRowmap, mergedEntries, numCols); return crsGraph_t(mergedEntries, mergedRowmap); } template -crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - return sort_and_merge_graph(typename crsGraph_t::execution_space(), G); +crsGraph_t sort_and_merge_graph( + const crsGraph_t& G, typename crsGraph_t::entries_type::const_value_type& numCols = + Kokkos::ArithTraits::max()) { + return sort_and_merge_graph(typename crsGraph_t::execution_space(), G, numCols); } } // namespace KokkosSparse -namespace KokkosKernels { - -// ---------------------------------- -// BSR matrix/graph sorting utilities -// ---------------------------------- - -// Sort a BRS matrix: within each row, sort entries ascending by column and -// permute the values accordingly. -template -[[deprecated]] void sort_bsr_matrix(const lno_t blockdim, const rowmap_t& rowmap, const entries_t& entries, - const values_t& values) { - KokkosSparse::sort_bsr_matrix(blockdim, rowmap, entries, values); -} - -template -[[deprecated]] void sort_bsr_matrix(const bsrMat_t& A) { - KokkosSparse::sort_bsr_matrix(A); -} - -// ---------------------------------- -// CRS matrix/graph sorting utilities -// ---------------------------------- - -// The sort_crs* functions sort the adjacent column list for each row into -// ascending order. - -template -[[deprecated]] void sort_crs_matrix(const rowmap_t& rowmap, const entries_t& entries, const values_t& values) { - KokkosSparse::sort_crs_matrix(rowmap, entries, values); -} - -template -[[deprecated]] void sort_crs_matrix(const crsMat_t& A) { - KokkosSparse::sort_crs_matrix(A); -} - -template -[[deprecated]] void sort_crs_graph(const rowmap_t& rowmap, const entries_t& entries) { - KokkosSparse::sort_crs_graph(rowmap, entries); -} - -template -[[deprecated]] void sort_crs_graph(const crsGraph_t& G) { - KokkosSparse::sort_crs_graph(G); -} - -// sort_and_merge_matrix produces a new matrix which is equivalent to A but is -// sorted and has no duplicated entries: each (i, j) is unique. Values for -// duplicated entries are summed. -template -[[deprecated]] crsMat_t sort_and_merge_matrix(const crsMat_t& A) { - KokkosSparse::sort_and_merge_matrix(A); -} - -template -[[deprecated]] crsGraph_t sort_and_merge_graph(const crsGraph_t& G) { - KokkosSparse::sort_and_merge_graph(G); -} - -template -[[deprecated]] void sort_and_merge_graph(const typename rowmap_t::const_type& rowmap_in, const entries_t& entries_in, - rowmap_t& rowmap_out, entries_t& entries_out) { - KokkosSparse::sort_and_merge_graph(rowmap_in, entries_in, rowmap_out, entries_out); -} - -} // namespace KokkosKernels - #endif // _KOKKOSSPARSE_SORTCRS_HPP diff --git a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp index 781857ef551f..d73787481e0e 100644 --- a/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp +++ b/packages/kokkos-kernels/sparse/src/KokkosSparse_Utils.hpp @@ -848,6 +848,19 @@ ordinal_t graph_max_degree(const rowmap_t &rowmap) { return val; } +template +typename rowmap_t::non_const_value_type graph_max_degree(const execution_space &exec, const rowmap_t &rowmap) { + using Offset = typename rowmap_t::non_const_value_type; + using Reducer = Kokkos::Max; + Offset nrows = rowmap.extent(0); + if (nrows) nrows--; + if (nrows == 0) return 0; + Offset val; + Kokkos::parallel_reduce(Kokkos::RangePolicy(exec, 0, nrows), + MaxDegreeFunctor(rowmap), Reducer(val)); + return val; +} + template void graph_min_max_degree(const rowmap_t &rowmap, ordinal_t &min_degree, ordinal_t &max_degree) { using Reducer = Kokkos::MinMax; From f598d5a9d9f3022e25cbb3ad5dbe019d3c5b6717 Mon Sep 17 00:00:00 2001 From: Roger Pawlowski Date: Tue, 27 Aug 2024 12:36:56 -0600 Subject: [PATCH 14/23] Phalanx: updates for c++20 c++20 added a new keyword "requires" for concepts. Phalanx had a function on a templated class called requires... --- .../test/DagManager/DagManagerTest.cpp | 66 +++++++++---------- .../EvaluatorMacros/EvaluatorMacrosTest.cpp | 8 +-- .../EvaluatorMacros/EvaluatorWithMacros.hpp | 4 +- .../EvaluatorWithMacros_Def.hpp | 4 +- .../test/Utilities/Evaluator_MockDAG.hpp | 2 +- .../test/Utilities/Evaluator_MockDAG_Def.hpp | 4 +- 6 files changed, 44 insertions(+), 44 deletions(-) diff --git a/packages/phalanx/test/DagManager/DagManagerTest.cpp b/packages/phalanx/test/DagManager/DagManagerTest.cpp index 34b9dc36caca..ffee37324306 100644 --- a/packages/phalanx/test/DagManager/DagManagerTest.cpp +++ b/packages/phalanx/test/DagManager/DagManagerTest.cpp @@ -50,8 +50,8 @@ void registerDagNodes(PHX::DagManager& em, RCP a = rcp(new Mock); a->setName("Eval_A"); a->evaluates("A"); - a->requires("B"); - a->requires("C"); + a->depends("B"); + a->depends("C"); em.registerEvaluator(a); } @@ -60,7 +60,7 @@ void registerDagNodes(PHX::DagManager& em, b->setName("Eval_B"); b->evaluates("B"); b->evaluates("D"); - b->requires("E"); + b->depends("E"); em.registerEvaluator(b); } @@ -68,7 +68,7 @@ void registerDagNodes(PHX::DagManager& em, RCP c = rcp(new Mock); c->setName("Eval_C"); c->evaluates("C"); - c->requires("E"); + c->depends("E"); em.registerEvaluator(c); } @@ -77,7 +77,7 @@ void registerDagNodes(PHX::DagManager& em, e->setName("Eval_E"); e->evaluates("E"); if (addCircularDependency) - e->requires("D"); + e->depends("D"); em.registerEvaluator(e); } @@ -86,7 +86,7 @@ void registerDagNodes(PHX::DagManager& em, RCP c = rcp(new Mock); c->setName("DUPLICATE Eval_C"); c->evaluates("C"); - c->requires("E"); + c->depends("E"); em.registerEvaluator(c); } } @@ -342,22 +342,22 @@ TEUCHOS_UNIT_TEST(dag, analyze_graph2) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_B"); m->evaluates("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -469,8 +469,8 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { @@ -483,7 +483,7 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -496,14 +496,14 @@ TEUCHOS_UNIT_TEST(dag, contrib_and_eval_B) RCP m = rcp(new Mock); m->setName("Eval_B+"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { // Contributes to B also RCP m = rcp(new Mock); m->setName("Eval_B++"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } @@ -572,15 +572,15 @@ TEUCHOS_UNIT_TEST(dag, contrib_only_B) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); - m->requires("C"); + m->depends("B"); + m->depends("C"); dag.registerEvaluator(m); } { RCP m = rcp(new Mock); m->setName("Eval_C"); m->evaluates("C"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { @@ -593,14 +593,14 @@ TEUCHOS_UNIT_TEST(dag, contrib_only_B) RCP m = rcp(new Mock); m->setName("Eval_B+"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } { // Contributes to B also RCP m = rcp(new Mock); m->setName("Eval_B++"); m->contributes("B"); - m->requires("D"); + m->depends("D"); dag.registerEvaluator(m); } @@ -665,7 +665,7 @@ TEUCHOS_UNIT_TEST(dag, alias_field) RCP m = rcp(new Mock); m->setName("Eval_A"); m->evaluates("A"); - m->requires("B"); + m->depends("B"); dag.registerEvaluator(m); } { @@ -746,14 +746,14 @@ TEUCHOS_UNIT_TEST(dag, use_range_and_unshared) RCP e = rcp(new Mock); e->setName("c"); e->evaluates("f3"); - e->requires("f2"); + e->depends("f2"); dag.registerEvaluator(e); } { RCP e = rcp(new Mock); e->setName("e"); e->evaluates("f4"); - e->requires("f3"); + e->depends("f3"); dag.registerEvaluator(e); } { @@ -766,7 +766,7 @@ TEUCHOS_UNIT_TEST(dag, use_range_and_unshared) RCP e = rcp(new Mock); e->setName("b"); e->evaluates("f2"); - e->requires("f1"); + e->depends("f1"); e->unshared("f2"); e->unshared("f1"); dag.registerEvaluator(e); @@ -997,7 +997,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Convection Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1005,7 +1005,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Diffusion Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1013,7 +1013,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) RCP e = rcp(new Mock); e->setName("Reaction Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1023,7 +1023,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_only) // Important that this is "contributes" to catch writing graph // output correctly. e->contributes("Scatter",use_dynamic_layout); - e->requires("Residual",use_dynamic_layout); + e->depends("Residual",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1086,7 +1086,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Initialize"); e->evaluates("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1094,7 +1094,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Convection Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1102,7 +1102,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Diffusion Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1110,7 +1110,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) RCP e = rcp(new Mock); e->setName("Reaction Operator"); e->contributes("Residual",use_dynamic_layout); - e->requires("X",use_dynamic_layout); + e->depends("X",use_dynamic_layout); dm.registerEvaluator(e); } @@ -1120,7 +1120,7 @@ TEUCHOS_UNIT_TEST(contrib, basic_contrib_and_evalauted) // Important that this is "contributes" to catch writing graph // output correctly. e->contributes("Scatter",use_dynamic_layout); - e->requires("Residual",use_dynamic_layout); + e->depends("Residual",use_dynamic_layout); dm.registerEvaluator(e); } diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp index 57081eb2fe88..abb938d199e8 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorMacrosTest.cpp @@ -46,8 +46,8 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP a = rcp(new Ev1(*plist_a)); a->setName("Eval_A"); a->evaluates("A"); - a->requires("B"); - a->requires("C"); + a->depends("B"); + a->depends("C"); fm.registerEvaluator(a); } { @@ -55,7 +55,7 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP b = rcp(new Ev2(*plist_b)); b->setName("Eval_B"); b->evaluates("B"); - b->requires("D"); + b->depends("D"); fm.registerEvaluator(b); } { @@ -63,7 +63,7 @@ TEUCHOS_UNIT_TEST(evaluator_macros, basic) RCP c = rcp(new Ev2(*plist_c)); c->setName("Eval_C"); c->evaluates("C"); - c->requires("D"); + c->depends("D"); fm.registerEvaluator(c); } { diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp index 13badbfcdb06..de1a86677e2b 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros.hpp @@ -19,7 +19,7 @@ namespace PHX { PHX_EVALUATOR_CLASS(EvaluatorWithMacros1) public: void evaluates(const std::string& field_name); - void requires(const std::string& field_name); + void depends(const std::string& field_name); void bindField(const PHX::FieldTag& ft, const std::any& f); PHX_EVALUATOR_CLASS_END @@ -27,7 +27,7 @@ namespace PHX { PHX_EVALUATOR_CLASS_PP(EvaluatorWithMacros2) public: void evaluates(const std::string& field_name); - void requires(const std::string& field_name); + void depends(const std::string& field_name); void bindField(const PHX::FieldTag& ft, const std::any& f); PHX_EVALUATOR_CLASS_END diff --git a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp index 8361a2bc070c..8e35a31a3850 100644 --- a/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp +++ b/packages/phalanx/test/EvaluatorMacros/EvaluatorWithMacros_Def.hpp @@ -45,7 +45,7 @@ namespace PHX { } template - void EvaluatorWithMacros1::requires(const std::string& n) + void EvaluatorWithMacros1::depends(const std::string& n) { using Teuchos::RCP; using Teuchos::rcp; @@ -105,7 +105,7 @@ namespace PHX { } template - void EvaluatorWithMacros2::requires(const std::string& n) + void EvaluatorWithMacros2::depends(const std::string& n) { using Teuchos::RCP; using Teuchos::rcp; diff --git a/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp b/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp index 378406c1ea0b..3981f832352c 100644 --- a/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp +++ b/packages/phalanx/test/Utilities/Evaluator_MockDAG.hpp @@ -25,7 +25,7 @@ namespace PHX { PHX::FieldManager& fm); void evaluateFields(typename Traits::EvalData d); void evaluates(const std::string& field_name, const bool use_dynamic_layout=false); - void requires(const std::string& field_name, const bool use_dynamic_layout=false); + void depends(const std::string& field_name, const bool use_dynamic_layout=false); void contributes(const std::string& field_name, const bool use_dynamic_layout=false); void unshared(const std::string& field_name); }; diff --git a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp index d755b35f50ef..4c6884f8cbdd 100644 --- a/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp +++ b/packages/phalanx/test/Utilities/Evaluator_MockDAG_Def.hpp @@ -48,8 +48,8 @@ namespace PHX { } template - void MockDAG::requires(const std::string& n, - const bool use_dynamic_layout) + void MockDAG::depends(const std::string& n, + const bool use_dynamic_layout) { using Teuchos::RCP; using Teuchos::rcp; From aa5fa7a81a842abc709f1dbeccf1a4627ff8ab8d Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Tue, 27 Aug 2024 14:58:03 -0600 Subject: [PATCH 15/23] SEACAS: Update to fmt-11.0 --- .../ioss/src/private_copy_fmt/fmt/args.h | 128 +- .../ioss/src/private_copy_fmt/fmt/base.h | 3077 +++++++++++++++ .../ioss/src/private_copy_fmt/fmt/chrono.h | 1353 ++++--- .../ioss/src/private_copy_fmt/fmt/color.h | 285 +- .../ioss/src/private_copy_fmt/fmt/compile.h | 238 +- .../ioss/src/private_copy_fmt/fmt/core.h | 3346 +---------------- .../src/private_copy_fmt/fmt/format-inl.h | 621 +-- .../ioss/src/private_copy_fmt/fmt/format.h | 2849 +++++++------- .../ioss/src/private_copy_fmt/fmt/os.h | 305 +- .../ioss/src/private_copy_fmt/fmt/ostream.h | 178 +- .../ioss/src/private_copy_fmt/fmt/printf.h | 502 +-- .../ioss/src/private_copy_fmt/fmt/ranges.h | 730 ++-- .../ioss/src/private_copy_fmt/fmt/std.h | 676 +++- .../ioss/src/private_copy_fmt/fmt/xchar.h | 230 +- 14 files changed, 7678 insertions(+), 6840 deletions(-) create mode 100644 packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h index a3966d140719..31a60e8faf1a 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/args.h @@ -1,4 +1,4 @@ -// Formatting library for C++ - dynamic format arguments +// Formatting library for C++ - dynamic argument lists // // Copyright (c) 2012 - present, Victor Zverovich // All rights reserved. @@ -8,11 +8,13 @@ #ifndef FMT_ARGS_H_ #define FMT_ARGS_H_ -#include // std::reference_wrapper -#include // std::unique_ptr -#include +#ifndef FMT_MODULE +# include // std::reference_wrapper +# include // std::unique_ptr +# include +#endif -#include "core.h" +#include "format.h" // std_string_view FMT_BEGIN_NAMESPACE @@ -22,20 +24,24 @@ template struct is_reference_wrapper : std::false_type {}; template struct is_reference_wrapper> : std::true_type {}; -template const T& unwrap(const T& v) { return v; } -template const T& unwrap(const std::reference_wrapper& v) { +template auto unwrap(const T& v) -> const T& { return v; } +template +auto unwrap(const std::reference_wrapper& v) -> const T& { return static_cast(v); } -class dynamic_arg_list { - // Workaround for clang's -Wweak-vtables. Unlike for regular classes, for - // templates it doesn't complain about inability to deduce single translation - // unit for placing vtable. So storage_node_base is made a fake template. - template struct node { - virtual ~node() = default; - std::unique_ptr> next; - }; +// node is defined outside dynamic_arg_list to workaround a C2504 bug in MSVC +// 2022 (v17.10.0). +// +// Workaround for clang's -Wweak-vtables. Unlike for regular classes, for +// templates it doesn't complain about inability to deduce single translation +// unit for placing vtable. So node is made a fake template. +template struct node { + virtual ~node() = default; + std::unique_ptr> next; +}; +class dynamic_arg_list { template struct typed_node : node<> { T value; @@ -50,7 +56,7 @@ class dynamic_arg_list { std::unique_ptr> head_; public: - template const T& push(const Arg& arg) { + template auto push(const Arg& arg) -> const T& { auto new_node = std::unique_ptr>(new typed_node(arg)); auto& value = new_node->value; new_node->next = std::move(head_); @@ -61,14 +67,10 @@ class dynamic_arg_list { } // namespace detail /** - \rst - A dynamic version of `fmt::format_arg_store`. - It's equipped with a storage to potentially temporary objects which lifetimes - could be shorter than the format arguments object. - - It can be implicitly converted into `~fmt::basic_format_args` for passing - into type-erased formatting functions such as `~fmt::vformat`. - \endrst + * A dynamic list of formatting arguments with storage. + * + * It can be implicitly converted into `fmt::basic_format_args` for passing + * into type-erased formatting functions such as `fmt::vformat`. */ template class dynamic_format_arg_store @@ -110,14 +112,14 @@ class dynamic_format_arg_store friend class basic_format_args; - unsigned long long get_types() const { + auto get_types() const -> unsigned long long { return detail::is_unpacked_bit | data_.size() | (named_info_.empty() ? 0ULL : static_cast(detail::has_named_args_bit)); } - const basic_format_arg* data() const { + auto data() const -> const basic_format_arg* { return named_info_.empty() ? data_.data() : data_.data() + 1; } @@ -146,22 +148,20 @@ class dynamic_format_arg_store constexpr dynamic_format_arg_store() = default; /** - \rst - Adds an argument into the dynamic store for later passing to a formatting - function. - - Note that custom types and string types (but not string views) are copied - into the store dynamically allocating memory if necessary. - - **Example**:: - - fmt::dynamic_format_arg_store store; - store.push_back(42); - store.push_back("abc"); - store.push_back(1.5f); - std::string result = fmt::vformat("{} and {} and {}", store); - \endrst - */ + * Adds an argument into the dynamic store for later passing to a formatting + * function. + * + * Note that custom types and string types (but not string views) are copied + * into the store dynamically allocating memory if necessary. + * + * **Example**: + * + * fmt::dynamic_format_arg_store store; + * store.push_back(42); + * store.push_back("abc"); + * store.push_back(1.5f); + * std::string result = fmt::vformat("{} and {} and {}", store); + */ template void push_back(const T& arg) { if (detail::const_check(need_copy::value)) emplace_arg(dynamic_args_.push>(arg)); @@ -170,20 +170,18 @@ class dynamic_format_arg_store } /** - \rst - Adds a reference to the argument into the dynamic store for later passing to - a formatting function. - - **Example**:: - - fmt::dynamic_format_arg_store store; - char band[] = "Rolling Stones"; - store.push_back(std::cref(band)); - band[9] = 'c'; // Changing str affects the output. - std::string result = fmt::vformat("{}", store); - // result == "Rolling Scones" - \endrst - */ + * Adds a reference to the argument into the dynamic store for later passing + * to a formatting function. + * + * **Example**: + * + * fmt::dynamic_format_arg_store store; + * char band[] = "Rolling Stones"; + * store.push_back(std::cref(band)); + * band[9] = 'c'; // Changing str affects the output. + * std::string result = fmt::vformat("{}", store); + * // result == "Rolling Scones" + */ template void push_back(std::reference_wrapper arg) { static_assert( need_copy::value, @@ -192,10 +190,10 @@ class dynamic_format_arg_store } /** - Adds named argument into the dynamic store for later passing to a formatting - function. ``std::reference_wrapper`` is supported to avoid copying of the - argument. The name is always copied into the store. - */ + * Adds named argument into the dynamic store for later passing to a + * formatting function. `std::reference_wrapper` is supported to avoid + * copying of the argument. The name is always copied into the store. + */ template void push_back(const detail::named_arg& arg) { const char_type* arg_name = @@ -208,19 +206,15 @@ class dynamic_format_arg_store } } - /** Erase all elements from the store */ + /// Erase all elements from the store. void clear() { data_.clear(); named_info_.clear(); dynamic_args_ = detail::dynamic_arg_list(); } - /** - \rst - Reserves space to store at least *new_cap* arguments including - *new_cap_named* named arguments. - \endrst - */ + /// Reserves space to store at least `new_cap` arguments including + /// `new_cap_named` named arguments. void reserve(size_t new_cap, size_t new_cap_named) { FMT_ASSERT(new_cap >= new_cap_named, "Set of arguments includes set of named arguments"); diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h new file mode 100644 index 000000000000..6276494253dd --- /dev/null +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/base.h @@ -0,0 +1,3077 @@ +// Formatting library for C++ - the base API for char/UTF-8 +// +// Copyright (c) 2012 - present, Victor Zverovich +// All rights reserved. +// +// For the license information refer to format.h. + +#ifndef FMT_BASE_H_ +#define FMT_BASE_H_ + +#if defined(FMT_IMPORT_STD) && !defined(FMT_MODULE) +# define FMT_MODULE +#endif + +#ifndef FMT_MODULE +# include // CHAR_BIT +# include // FILE +# include // strlen + +// is also included transitively from . +# include // std::byte +# include // std::enable_if +#endif + +// The fmt library version in the form major * 10000 + minor * 100 + patch. +#define FMT_VERSION 110002 + +// Detect compiler versions. +#if defined(__clang__) && !defined(__ibmxl__) +# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) +#else +# define FMT_CLANG_VERSION 0 +#endif +#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) +# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) +#else +# define FMT_GCC_VERSION 0 +#endif +#if defined(__ICL) +# define FMT_ICC_VERSION __ICL +#elif defined(__INTEL_COMPILER) +# define FMT_ICC_VERSION __INTEL_COMPILER +#else +# define FMT_ICC_VERSION 0 +#endif +#if defined(_MSC_VER) +# define FMT_MSC_VERSION _MSC_VER +#else +# define FMT_MSC_VERSION 0 +#endif + +// Detect standard library versions. +#ifdef _GLIBCXX_RELEASE +# define FMT_GLIBCXX_RELEASE _GLIBCXX_RELEASE +#else +# define FMT_GLIBCXX_RELEASE 0 +#endif +#ifdef _LIBCPP_VERSION +# define FMT_LIBCPP_VERSION _LIBCPP_VERSION +#else +# define FMT_LIBCPP_VERSION 0 +#endif + +#ifdef _MSVC_LANG +# define FMT_CPLUSPLUS _MSVC_LANG +#else +# define FMT_CPLUSPLUS __cplusplus +#endif + +// Detect __has_*. +#ifdef __has_feature +# define FMT_HAS_FEATURE(x) __has_feature(x) +#else +# define FMT_HAS_FEATURE(x) 0 +#endif +#ifdef __has_include +# define FMT_HAS_INCLUDE(x) __has_include(x) +#else +# define FMT_HAS_INCLUDE(x) 0 +#endif +#ifdef __has_cpp_attribute +# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) +#else +# define FMT_HAS_CPP_ATTRIBUTE(x) 0 +#endif + +#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ + (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) + +// Detect C++14 relaxed constexpr. +#ifdef FMT_USE_CONSTEXPR +// Use the provided definition. +#elif FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L +// GCC only allows throw in constexpr since version 6: +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67371. +# define FMT_USE_CONSTEXPR 1 +#elif FMT_ICC_VERSION +# define FMT_USE_CONSTEXPR 0 // https://github.com/fmtlib/fmt/issues/1628 +#elif FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 +# define FMT_USE_CONSTEXPR 1 +#else +# define FMT_USE_CONSTEXPR 0 +#endif +#if FMT_USE_CONSTEXPR +# define FMT_CONSTEXPR constexpr +#else +# define FMT_CONSTEXPR +#endif + +// Detect consteval, C++20 constexpr extensions and std::is_constant_evaluated. +#if !defined(__cpp_lib_is_constant_evaluated) +# define FMT_USE_CONSTEVAL 0 +#elif FMT_CPLUSPLUS < 201709L +# define FMT_USE_CONSTEVAL 0 +#elif FMT_GLIBCXX_RELEASE && FMT_GLIBCXX_RELEASE < 10 +# define FMT_USE_CONSTEVAL 0 +#elif FMT_LIBCPP_VERSION && FMT_LIBCPP_VERSION < 10000 +# define FMT_USE_CONSTEVAL 0 +#elif defined(__apple_build_version__) && __apple_build_version__ < 14000029L +# define FMT_USE_CONSTEVAL 0 // consteval is broken in Apple clang < 14. +#elif FMT_MSC_VERSION && FMT_MSC_VERSION < 1929 +# define FMT_USE_CONSTEVAL 0 // consteval is broken in MSVC VS2019 < 16.10. +#elif defined(__cpp_consteval) +# define FMT_USE_CONSTEVAL 1 +#elif FMT_GCC_VERSION >= 1002 || FMT_CLANG_VERSION >= 1101 +# define FMT_USE_CONSTEVAL 1 +#else +# define FMT_USE_CONSTEVAL 0 +#endif +#if FMT_USE_CONSTEVAL +# define FMT_CONSTEVAL consteval +# define FMT_CONSTEXPR20 constexpr +#else +# define FMT_CONSTEVAL +# define FMT_CONSTEXPR20 +#endif + +#if defined(FMT_USE_NONTYPE_TEMPLATE_ARGS) +// Use the provided definition. +#elif defined(__NVCOMPILER) +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 +#elif FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#elif defined(__cpp_nontype_template_args) && \ + __cpp_nontype_template_args >= 201911L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#elif FMT_CLANG_VERSION >= 1200 && FMT_CPLUSPLUS >= 202002L +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 +#else +# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 +#endif + +#ifdef FMT_USE_CONCEPTS +// Use the provided definition. +#elif defined(__cpp_concepts) +# define FMT_USE_CONCEPTS 1 +#else +# define FMT_USE_CONCEPTS 0 +#endif + +// Check if exceptions are disabled. +#ifdef FMT_EXCEPTIONS +// Use the provided definition. +#elif defined(__GNUC__) && !defined(__EXCEPTIONS) +# define FMT_EXCEPTIONS 0 +#elif FMT_MSC_VERSION && !_HAS_EXCEPTIONS +# define FMT_EXCEPTIONS 0 +#else +# define FMT_EXCEPTIONS 1 +#endif +#if FMT_EXCEPTIONS +# define FMT_TRY try +# define FMT_CATCH(x) catch (x) +#else +# define FMT_TRY if (true) +# define FMT_CATCH(x) if (false) +#endif + +#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough) +# define FMT_FALLTHROUGH [[fallthrough]] +#elif defined(__clang__) +# define FMT_FALLTHROUGH [[clang::fallthrough]] +#elif FMT_GCC_VERSION >= 700 && \ + (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) +# define FMT_FALLTHROUGH [[gnu::fallthrough]] +#else +# define FMT_FALLTHROUGH +#endif + +// Disable [[noreturn]] on MSVC/NVCC because of bogus unreachable code warnings. +#if FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && !defined(__NVCC__) +# define FMT_NORETURN [[noreturn]] +#else +# define FMT_NORETURN +#endif + +#ifndef FMT_NODISCARD +# if FMT_HAS_CPP17_ATTRIBUTE(nodiscard) +# define FMT_NODISCARD [[nodiscard]] +# else +# define FMT_NODISCARD +# endif +#endif + +#ifdef FMT_DEPRECATED +// Use the provided definition. +#elif FMT_HAS_CPP14_ATTRIBUTE(deprecated) +# define FMT_DEPRECATED [[deprecated]] +#else +# define FMT_DEPRECATED /* deprecated */ +#endif + +#ifdef FMT_INLINE +// Use the provided definition. +#elif FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_ALWAYS_INLINE inline __attribute__((always_inline)) +#else +# define FMT_ALWAYS_INLINE inline +#endif +// A version of FMT_INLINE to prevent code bloat in debug mode. +#ifdef NDEBUG +# define FMT_INLINE FMT_ALWAYS_INLINE +#else +# define FMT_INLINE inline +#endif + +#if FMT_GCC_VERSION || FMT_CLANG_VERSION +# define FMT_VISIBILITY(value) __attribute__((visibility(value))) +#else +# define FMT_VISIBILITY(value) +#endif + +#ifndef FMT_GCC_PRAGMA +// Workaround a _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884 +// and an nvhpc warning: https://github.com/fmtlib/fmt/pull/2582. +# if FMT_GCC_VERSION >= 504 && !defined(__NVCOMPILER) +# define FMT_GCC_PRAGMA(arg) _Pragma(arg) +# else +# define FMT_GCC_PRAGMA(arg) +# endif +#endif + +// GCC < 5 requires this-> in decltype. +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +# define FMT_DECLTYPE_THIS this-> +#else +# define FMT_DECLTYPE_THIS +#endif + +#if FMT_MSC_VERSION +# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) +# define FMT_UNCHECKED_ITERATOR(It) \ + using _Unchecked_type = It // Mark iterator as checked. +#else +# define FMT_MSC_WARNING(...) +# define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It +#endif + +#ifndef FMT_BEGIN_NAMESPACE +# define FMT_BEGIN_NAMESPACE \ + namespace fmt { \ + inline namespace v11 { +# define FMT_END_NAMESPACE \ + } \ + } +#endif + +#ifndef FMT_EXPORT +# define FMT_EXPORT +# define FMT_BEGIN_EXPORT +# define FMT_END_EXPORT +#endif + +#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) +# if defined(FMT_LIB_EXPORT) +# define FMT_API __declspec(dllexport) +# elif defined(FMT_SHARED) +# define FMT_API __declspec(dllimport) +# endif +#elif defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_API FMT_VISIBILITY("default") +#endif +#ifndef FMT_API +# define FMT_API +#endif + +#ifndef FMT_UNICODE +# define FMT_UNICODE 1 +#endif + +// Check if rtti is available. +#ifndef FMT_USE_RTTI +// __RTTI is for EDG compilers. _CPPRTTI is for MSVC. +# if defined(__GXX_RTTI) || FMT_HAS_FEATURE(cxx_rtti) || defined(_CPPRTTI) || \ + defined(__INTEL_RTTI__) || defined(__RTTI) +# define FMT_USE_RTTI 1 +# else +# define FMT_USE_RTTI 0 +# endif +#endif + +#define FMT_FWD(...) static_cast(__VA_ARGS__) + +// Enable minimal optimizations for more compact code in debug mode. +FMT_GCC_PRAGMA("GCC push_options") +#if !defined(__OPTIMIZE__) && !defined(__CUDACC__) +FMT_GCC_PRAGMA("GCC optimize(\"Og\")") +#endif + +FMT_BEGIN_NAMESPACE + +// Implementations of enable_if_t and other metafunctions for older systems. +template +using enable_if_t = typename std::enable_if::type; +template +using conditional_t = typename std::conditional::type; +template using bool_constant = std::integral_constant; +template +using remove_reference_t = typename std::remove_reference::type; +template +using remove_const_t = typename std::remove_const::type; +template +using remove_cvref_t = typename std::remove_cv>::type; +template struct type_identity { + using type = T; +}; +template using type_identity_t = typename type_identity::type; +template +using make_unsigned_t = typename std::make_unsigned::type; +template +using underlying_t = typename std::underlying_type::type; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 +// A workaround for gcc 4.8 to make void_t work in a SFINAE context. +template struct void_t_impl { + using type = void; +}; +template using void_t = typename void_t_impl::type; +#else +template using void_t = void; +#endif + +struct monostate { + constexpr monostate() {} +}; + +// An enable_if helper to be used in template parameters which results in much +// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed +// to workaround a bug in MSVC 2019 (see #1140 and #1186). +#ifdef FMT_DOC +# define FMT_ENABLE_IF(...) +#else +# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0 +#endif + +// This is defined in base.h instead of format.h to avoid injecting in std. +// It is a template to avoid undesirable implicit conversions to std::byte. +#ifdef __cpp_lib_byte +template ::value)> +inline auto format_as(T b) -> unsigned char { + return static_cast(b); +} +#endif + +namespace detail { +// Suppresses "unused variable" warnings with the method described in +// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/. +// (void)var does not work on many Intel compilers. +template FMT_CONSTEXPR void ignore_unused(const T&...) {} + +constexpr auto is_constant_evaluated(bool default_value = false) noexcept + -> bool { +// Workaround for incompatibility between libstdc++ consteval-based +// std::is_constant_evaluated() implementation and clang-14: +// https://github.com/fmtlib/fmt/issues/3247. +#if FMT_CPLUSPLUS >= 202002L && FMT_GLIBCXX_RELEASE >= 12 && \ + (FMT_CLANG_VERSION >= 1400 && FMT_CLANG_VERSION < 1500) + ignore_unused(default_value); + return __builtin_is_constant_evaluated(); +#elif defined(__cpp_lib_is_constant_evaluated) + ignore_unused(default_value); + return std::is_constant_evaluated(); +#else + return default_value; +#endif +} + +// Suppresses "conditional expression is constant" warnings. +template constexpr auto const_check(T value) -> T { return value; } + +FMT_NORETURN FMT_API void assert_fail(const char* file, int line, + const char* message); + +#if defined(FMT_ASSERT) +// Use the provided definition. +#elif defined(NDEBUG) +// FMT_ASSERT is not empty to avoid -Wempty-body. +# define FMT_ASSERT(condition, message) \ + fmt::detail::ignore_unused((condition), (message)) +#else +# define FMT_ASSERT(condition, message) \ + ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ + ? (void)0 \ + : fmt::detail::assert_fail(__FILE__, __LINE__, (message))) +#endif + +#ifdef FMT_USE_INT128 +// Do nothing. +#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ + !(FMT_CLANG_VERSION && FMT_MSC_VERSION) +# define FMT_USE_INT128 1 +using int128_opt = __int128_t; // An optional native 128-bit integer. +using uint128_opt = __uint128_t; +template inline auto convert_for_visit(T value) -> T { + return value; +} +#else +# define FMT_USE_INT128 0 +#endif +#if !FMT_USE_INT128 +enum class int128_opt {}; +enum class uint128_opt {}; +// Reduce template instantiations. +template auto convert_for_visit(T) -> monostate { return {}; } +#endif + +// Casts a nonnegative integer to unsigned. +template +FMT_CONSTEXPR auto to_unsigned(Int value) -> make_unsigned_t { + FMT_ASSERT(std::is_unsigned::value || value >= 0, "negative value"); + return static_cast>(value); +} + +// A heuristic to detect std::string and std::[experimental::]string_view. +// It is mainly used to avoid dependency on <[experimental/]string_view>. +template +struct is_std_string_like : std::false_type {}; +template +struct is_std_string_like().find_first_of( + typename T::value_type(), 0))>> + : std::is_convertible().data()), + const typename T::value_type*> {}; + +// Returns true iff the literal encoding is UTF-8. +constexpr auto is_utf8_enabled() -> bool { + // Avoid an MSVC sign extension bug: https://github.com/fmtlib/fmt/pull/2297. + using uchar = unsigned char; + return sizeof("\u00A7") == 3 && uchar("\u00A7"[0]) == 0xC2 && + uchar("\u00A7"[1]) == 0xA7; +} +constexpr auto use_utf8() -> bool { + return !FMT_MSC_VERSION || is_utf8_enabled(); +} + +static_assert(!FMT_UNICODE || use_utf8(), + "Unicode support requires compiling with /utf-8"); + +template FMT_CONSTEXPR auto length(const Char* s) -> size_t { + size_t len = 0; + while (*s++) ++len; + return len; +} + +template +FMT_CONSTEXPR auto compare(const Char* s1, const Char* s2, std::size_t n) + -> int { + if (!is_constant_evaluated() && sizeof(Char) == 1) return memcmp(s1, s2, n); + for (; n != 0; ++s1, ++s2, --n) { + if (*s1 < *s2) return -1; + if (*s1 > *s2) return 1; + } + return 0; +} + +namespace adl { +using namespace std; + +template +auto invoke_back_inserter() + -> decltype(back_inserter(std::declval())); +} // namespace adl + +template +struct is_back_insert_iterator : std::false_type {}; + +template +struct is_back_insert_iterator< + It, bool_constant()), + It>::value>> : std::true_type {}; + +// Extracts a reference to the container from *insert_iterator. +template +inline auto get_container(OutputIt it) -> typename OutputIt::container_type& { + struct accessor : OutputIt { + accessor(OutputIt base) : OutputIt(base) {} + using OutputIt::container; + }; + return *accessor(it).container; +} +} // namespace detail + +// Checks whether T is a container with contiguous storage. +template struct is_contiguous : std::false_type {}; + +/** + * An implementation of `std::basic_string_view` for pre-C++17. It provides a + * subset of the API. `fmt::basic_string_view` is used for format strings even + * if `std::basic_string_view` is available to prevent issues when a library is + * compiled with a different `-std` option than the client code (which is not + * recommended). + */ +FMT_EXPORT +template class basic_string_view { + private: + const Char* data_; + size_t size_; + + public: + using value_type = Char; + using iterator = const Char*; + + constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {} + + /// Constructs a string reference object from a C string and a size. + constexpr basic_string_view(const Char* s, size_t count) noexcept + : data_(s), size_(count) {} + + constexpr basic_string_view(std::nullptr_t) = delete; + + /// Constructs a string reference object from a C string. + FMT_CONSTEXPR20 + basic_string_view(const Char* s) + : data_(s), + size_(detail::const_check(std::is_same::value && + !detail::is_constant_evaluated(false)) + ? strlen(reinterpret_cast(s)) + : detail::length(s)) {} + + /// Constructs a string reference from a `std::basic_string` or a + /// `std::basic_string_view` object. + template ::value&& std::is_same< + typename S::value_type, Char>::value)> + FMT_CONSTEXPR basic_string_view(const S& s) noexcept + : data_(s.data()), size_(s.size()) {} + + /// Returns a pointer to the string data. + constexpr auto data() const noexcept -> const Char* { return data_; } + + /// Returns the string size. + constexpr auto size() const noexcept -> size_t { return size_; } + + constexpr auto begin() const noexcept -> iterator { return data_; } + constexpr auto end() const noexcept -> iterator { return data_ + size_; } + + constexpr auto operator[](size_t pos) const noexcept -> const Char& { + return data_[pos]; + } + + FMT_CONSTEXPR void remove_prefix(size_t n) noexcept { + data_ += n; + size_ -= n; + } + + FMT_CONSTEXPR auto starts_with(basic_string_view sv) const noexcept + -> bool { + return size_ >= sv.size_ && detail::compare(data_, sv.data_, sv.size_) == 0; + } + FMT_CONSTEXPR auto starts_with(Char c) const noexcept -> bool { + return size_ >= 1 && *data_ == c; + } + FMT_CONSTEXPR auto starts_with(const Char* s) const -> bool { + return starts_with(basic_string_view(s)); + } + + // Lexicographically compare this string reference to other. + FMT_CONSTEXPR auto compare(basic_string_view other) const -> int { + size_t str_size = size_ < other.size_ ? size_ : other.size_; + int result = detail::compare(data_, other.data_, str_size); + if (result == 0) + result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); + return result; + } + + FMT_CONSTEXPR friend auto operator==(basic_string_view lhs, + basic_string_view rhs) -> bool { + return lhs.compare(rhs) == 0; + } + friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) != 0; + } + friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) < 0; + } + friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) <= 0; + } + friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) > 0; + } + friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { + return lhs.compare(rhs) >= 0; + } +}; + +FMT_EXPORT +using string_view = basic_string_view; + +/// Specifies if `T` is a character type. Can be specialized by users. +FMT_EXPORT +template struct is_char : std::false_type {}; +template <> struct is_char : std::true_type {}; + +namespace detail { + +// Constructs fmt::basic_string_view from types implicitly convertible +// to it, deducing Char. Explicitly convertible types such as the ones returned +// from FMT_STRING are intentionally excluded. +template ::value)> +constexpr auto to_string_view(const Char* s) -> basic_string_view { + return s; +} +template ::value)> +constexpr auto to_string_view(const T& s) + -> basic_string_view { + return s; +} +template +constexpr auto to_string_view(basic_string_view s) + -> basic_string_view { + return s; +} + +template +struct has_to_string_view : std::false_type {}; +// detail:: is intentional since to_string_view is not an extension point. +template +struct has_to_string_view< + T, void_t()))>> + : std::true_type {}; + +template struct string_literal { + static constexpr Char value[sizeof...(C)] = {C...}; + constexpr operator basic_string_view() const { + return {value, sizeof...(C)}; + } +}; +#if FMT_CPLUSPLUS < 201703L +template +constexpr Char string_literal::value[sizeof...(C)]; +#endif + +enum class type { + none_type, + // Integer types should go first, + int_type, + uint_type, + long_long_type, + ulong_long_type, + int128_type, + uint128_type, + bool_type, + char_type, + last_integer_type = char_type, + // followed by floating-point types. + float_type, + double_type, + long_double_type, + last_numeric_type = long_double_type, + cstring_type, + string_type, + pointer_type, + custom_type +}; + +// Maps core type T to the corresponding type enum constant. +template +struct type_constant : std::integral_constant {}; + +#define FMT_TYPE_CONSTANT(Type, constant) \ + template \ + struct type_constant \ + : std::integral_constant {} + +FMT_TYPE_CONSTANT(int, int_type); +FMT_TYPE_CONSTANT(unsigned, uint_type); +FMT_TYPE_CONSTANT(long long, long_long_type); +FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); +FMT_TYPE_CONSTANT(int128_opt, int128_type); +FMT_TYPE_CONSTANT(uint128_opt, uint128_type); +FMT_TYPE_CONSTANT(bool, bool_type); +FMT_TYPE_CONSTANT(Char, char_type); +FMT_TYPE_CONSTANT(float, float_type); +FMT_TYPE_CONSTANT(double, double_type); +FMT_TYPE_CONSTANT(long double, long_double_type); +FMT_TYPE_CONSTANT(const Char*, cstring_type); +FMT_TYPE_CONSTANT(basic_string_view, string_type); +FMT_TYPE_CONSTANT(const void*, pointer_type); + +constexpr auto is_integral_type(type t) -> bool { + return t > type::none_type && t <= type::last_integer_type; +} +constexpr auto is_arithmetic_type(type t) -> bool { + return t > type::none_type && t <= type::last_numeric_type; +} + +constexpr auto set(type rhs) -> int { return 1 << static_cast(rhs); } +constexpr auto in(type t, int set) -> bool { + return ((set >> static_cast(t)) & 1) != 0; +} + +// Bitsets of types. +enum { + sint_set = + set(type::int_type) | set(type::long_long_type) | set(type::int128_type), + uint_set = set(type::uint_type) | set(type::ulong_long_type) | + set(type::uint128_type), + bool_set = set(type::bool_type), + char_set = set(type::char_type), + float_set = set(type::float_type) | set(type::double_type) | + set(type::long_double_type), + string_set = set(type::string_type), + cstring_set = set(type::cstring_type), + pointer_set = set(type::pointer_type) +}; +} // namespace detail + +/// Reports a format error at compile time or, via a `format_error` exception, +/// at runtime. +// This function is intentionally not constexpr to give a compile-time error. +FMT_NORETURN FMT_API void report_error(const char* message); + +FMT_DEPRECATED FMT_NORETURN inline void throw_format_error( + const char* message) { + report_error(message); +} + +/// String's character (code unit) type. +template ()))> +using char_t = typename V::value_type; + +/** + * Parsing context consisting of a format string range being parsed and an + * argument counter for automatic indexing. + * You can use the `format_parse_context` type alias for `char` instead. + */ +FMT_EXPORT +template class basic_format_parse_context { + private: + basic_string_view format_str_; + int next_arg_id_; + + FMT_CONSTEXPR void do_check_arg_id(int id); + + public: + using char_type = Char; + using iterator = const Char*; + + explicit constexpr basic_format_parse_context( + basic_string_view format_str, int next_arg_id = 0) + : format_str_(format_str), next_arg_id_(next_arg_id) {} + + /// Returns an iterator to the beginning of the format string range being + /// parsed. + constexpr auto begin() const noexcept -> iterator { + return format_str_.begin(); + } + + /// Returns an iterator past the end of the format string range being parsed. + constexpr auto end() const noexcept -> iterator { return format_str_.end(); } + + /// Advances the begin iterator to `it`. + FMT_CONSTEXPR void advance_to(iterator it) { + format_str_.remove_prefix(detail::to_unsigned(it - begin())); + } + + /// Reports an error if using the manual argument indexing; otherwise returns + /// the next argument index and switches to the automatic indexing. + FMT_CONSTEXPR auto next_arg_id() -> int { + if (next_arg_id_ < 0) { + report_error("cannot switch from manual to automatic argument indexing"); + return 0; + } + int id = next_arg_id_++; + do_check_arg_id(id); + return id; + } + + /// Reports an error if using the automatic argument indexing; otherwise + /// switches to the manual indexing. + FMT_CONSTEXPR void check_arg_id(int id) { + if (next_arg_id_ > 0) { + report_error("cannot switch from automatic to manual argument indexing"); + return; + } + next_arg_id_ = -1; + do_check_arg_id(id); + } + FMT_CONSTEXPR void check_arg_id(basic_string_view) { + next_arg_id_ = -1; + } + FMT_CONSTEXPR void check_dynamic_spec(int arg_id); +}; + +FMT_EXPORT +using format_parse_context = basic_format_parse_context; + +namespace detail { +// A parse context with extra data used only in compile-time checks. +template +class compile_parse_context : public basic_format_parse_context { + private: + int num_args_; + const type* types_; + using base = basic_format_parse_context; + + public: + explicit FMT_CONSTEXPR compile_parse_context( + basic_string_view format_str, int num_args, const type* types, + int next_arg_id = 0) + : base(format_str, next_arg_id), num_args_(num_args), types_(types) {} + + constexpr auto num_args() const -> int { return num_args_; } + constexpr auto arg_type(int id) const -> type { return types_[id]; } + + FMT_CONSTEXPR auto next_arg_id() -> int { + int id = base::next_arg_id(); + if (id >= num_args_) report_error("argument not found"); + return id; + } + + FMT_CONSTEXPR void check_arg_id(int id) { + base::check_arg_id(id); + if (id >= num_args_) report_error("argument not found"); + } + using base::check_arg_id; + + FMT_CONSTEXPR void check_dynamic_spec(int arg_id) { + detail::ignore_unused(arg_id); + if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id])) + report_error("width/precision is not integer"); + } +}; + +/// A contiguous memory buffer with an optional growing ability. It is an +/// internal class and shouldn't be used directly, only via `memory_buffer`. +template class buffer { + private: + T* ptr_; + size_t size_; + size_t capacity_; + + using grow_fun = void (*)(buffer& buf, size_t capacity); + grow_fun grow_; + + protected: + // Don't initialize ptr_ since it is not accessed to save a few cycles. + FMT_MSC_WARNING(suppress : 26495) + FMT_CONSTEXPR20 buffer(grow_fun grow, size_t sz) noexcept + : size_(sz), capacity_(sz), grow_(grow) {} + + constexpr buffer(grow_fun grow, T* p = nullptr, size_t sz = 0, + size_t cap = 0) noexcept + : ptr_(p), size_(sz), capacity_(cap), grow_(grow) {} + + FMT_CONSTEXPR20 ~buffer() = default; + buffer(buffer&&) = default; + + /// Sets the buffer data and capacity. + FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept { + ptr_ = buf_data; + capacity_ = buf_capacity; + } + + public: + using value_type = T; + using const_reference = const T&; + + buffer(const buffer&) = delete; + void operator=(const buffer&) = delete; + + auto begin() noexcept -> T* { return ptr_; } + auto end() noexcept -> T* { return ptr_ + size_; } + + auto begin() const noexcept -> const T* { return ptr_; } + auto end() const noexcept -> const T* { return ptr_ + size_; } + + /// Returns the size of this buffer. + constexpr auto size() const noexcept -> size_t { return size_; } + + /// Returns the capacity of this buffer. + constexpr auto capacity() const noexcept -> size_t { return capacity_; } + + /// Returns a pointer to the buffer data (not null-terminated). + FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; } + FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; } + + /// Clears this buffer. + void clear() { size_ = 0; } + + // Tries resizing the buffer to contain `count` elements. If T is a POD type + // the new elements may not be initialized. + FMT_CONSTEXPR void try_resize(size_t count) { + try_reserve(count); + size_ = count <= capacity_ ? count : capacity_; + } + + // Tries increasing the buffer capacity to `new_capacity`. It can increase the + // capacity by a smaller amount than requested but guarantees there is space + // for at least one additional element either by increasing the capacity or by + // flushing the buffer if it is full. + FMT_CONSTEXPR void try_reserve(size_t new_capacity) { + if (new_capacity > capacity_) grow_(*this, new_capacity); + } + + FMT_CONSTEXPR void push_back(const T& value) { + try_reserve(size_ + 1); + ptr_[size_++] = value; + } + + /// Appends data to the end of the buffer. + template void append(const U* begin, const U* end) { + while (begin != end) { + auto count = to_unsigned(end - begin); + try_reserve(size_ + count); + auto free_cap = capacity_ - size_; + if (free_cap < count) count = free_cap; + // A loop is faster than memcpy on small sizes. + T* out = ptr_ + size_; + for (size_t i = 0; i < count; ++i) out[i] = begin[i]; + size_ += count; + begin += count; + } + } + + template FMT_CONSTEXPR auto operator[](Idx index) -> T& { + return ptr_[index]; + } + template + FMT_CONSTEXPR auto operator[](Idx index) const -> const T& { + return ptr_[index]; + } +}; + +struct buffer_traits { + explicit buffer_traits(size_t) {} + auto count() const -> size_t { return 0; } + auto limit(size_t size) -> size_t { return size; } +}; + +class fixed_buffer_traits { + private: + size_t count_ = 0; + size_t limit_; + + public: + explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} + auto count() const -> size_t { return count_; } + auto limit(size_t size) -> size_t { + size_t n = limit_ > count_ ? limit_ - count_ : 0; + count_ += size; + return size < n ? size : n; + } +}; + +// A buffer that writes to an output iterator when flushed. +template +class iterator_buffer : public Traits, public buffer { + private: + OutputIt out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() == buffer_size) static_cast(buf).flush(); + } + + void flush() { + auto size = this->size(); + this->clear(); + const T* begin = data_; + const T* end = begin + this->limit(size); + while (begin != end) *out_++ = *begin++; + } + + public: + explicit iterator_buffer(OutputIt out, size_t n = buffer_size) + : Traits(n), buffer(grow, data_, 0, buffer_size), out_(out) {} + iterator_buffer(iterator_buffer&& other) noexcept + : Traits(other), + buffer(grow, data_, 0, buffer_size), + out_(other.out_) {} + ~iterator_buffer() { + // Don't crash if flush fails during unwinding. + FMT_TRY { flush(); } + FMT_CATCH(...) {} + } + + auto out() -> OutputIt { + flush(); + return out_; + } + auto count() const -> size_t { return Traits::count() + this->size(); } +}; + +template +class iterator_buffer : public fixed_buffer_traits, + public buffer { + private: + T* out_; + enum { buffer_size = 256 }; + T data_[buffer_size]; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() == buf.capacity()) + static_cast(buf).flush(); + } + + void flush() { + size_t n = this->limit(this->size()); + if (this->data() == out_) { + out_ += n; + this->set(data_, buffer_size); + } + this->clear(); + } + + public: + explicit iterator_buffer(T* out, size_t n = buffer_size) + : fixed_buffer_traits(n), buffer(grow, out, 0, n), out_(out) {} + iterator_buffer(iterator_buffer&& other) noexcept + : fixed_buffer_traits(other), + buffer(static_cast(other)), + out_(other.out_) { + if (this->data() != out_) { + this->set(data_, buffer_size); + this->clear(); + } + } + ~iterator_buffer() { flush(); } + + auto out() -> T* { + flush(); + return out_; + } + auto count() const -> size_t { + return fixed_buffer_traits::count() + this->size(); + } +}; + +template class iterator_buffer : public buffer { + public: + explicit iterator_buffer(T* out, size_t = 0) + : buffer([](buffer&, size_t) {}, out, 0, ~size_t()) {} + + auto out() -> T* { return &*this->end(); } +}; + +// A buffer that writes to a container with the contiguous storage. +template +class iterator_buffer< + OutputIt, + enable_if_t::value && + is_contiguous::value, + typename OutputIt::container_type::value_type>> + : public buffer { + private: + using container_type = typename OutputIt::container_type; + using value_type = typename container_type::value_type; + container_type& container_; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t capacity) { + auto& self = static_cast(buf); + self.container_.resize(capacity); + self.set(&self.container_[0], capacity); + } + + public: + explicit iterator_buffer(container_type& c) + : buffer(grow, c.size()), container_(c) {} + explicit iterator_buffer(OutputIt out, size_t = 0) + : iterator_buffer(get_container(out)) {} + + auto out() -> OutputIt { return back_inserter(container_); } +}; + +// A buffer that counts the number of code units written discarding the output. +template class counting_buffer : public buffer { + private: + enum { buffer_size = 256 }; + T data_[buffer_size]; + size_t count_ = 0; + + static FMT_CONSTEXPR void grow(buffer& buf, size_t) { + if (buf.size() != buffer_size) return; + static_cast(buf).count_ += buf.size(); + buf.clear(); + } + + public: + counting_buffer() : buffer(grow, data_, 0, buffer_size) {} + + auto count() -> size_t { return count_ + this->size(); } +}; +} // namespace detail + +template +FMT_CONSTEXPR void basic_format_parse_context::do_check_arg_id(int id) { + // Argument id is only checked at compile-time during parsing because + // formatting has its own validation. + if (detail::is_constant_evaluated() && + (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { + using context = detail::compile_parse_context; + if (id >= static_cast(this)->num_args()) + report_error("argument not found"); + } +} + +template +FMT_CONSTEXPR void basic_format_parse_context::check_dynamic_spec( + int arg_id) { + if (detail::is_constant_evaluated() && + (!FMT_GCC_VERSION || FMT_GCC_VERSION >= 1200)) { + using context = detail::compile_parse_context; + static_cast(this)->check_dynamic_spec(arg_id); + } +} + +FMT_EXPORT template class basic_format_arg; +FMT_EXPORT template class basic_format_args; +FMT_EXPORT template class dynamic_format_arg_store; + +// A formatter for objects of type T. +FMT_EXPORT +template +struct formatter { + // A deleted default constructor indicates a disabled formatter. + formatter() = delete; +}; + +// Specifies if T has an enabled formatter specialization. A type can be +// formattable even if it doesn't have a formatter e.g. via a conversion. +template +using has_formatter = + std::is_constructible>; + +// An output iterator that appends to a buffer. It is used instead of +// back_insert_iterator to reduce symbol sizes and avoid dependency. +template class basic_appender { + private: + detail::buffer* buffer_; + + friend auto get_container(basic_appender app) -> detail::buffer& { + return *app.buffer_; + } + + public: + using iterator_category = int; + using value_type = T; + using difference_type = ptrdiff_t; + using pointer = T*; + using reference = T&; + using container_type = detail::buffer; + FMT_UNCHECKED_ITERATOR(basic_appender); + + FMT_CONSTEXPR basic_appender(detail::buffer& buf) : buffer_(&buf) {} + + auto operator=(T c) -> basic_appender& { + buffer_->push_back(c); + return *this; + } + auto operator*() -> basic_appender& { return *this; } + auto operator++() -> basic_appender& { return *this; } + auto operator++(int) -> basic_appender { return *this; } +}; + +using appender = basic_appender; + +namespace detail { +template +struct is_back_insert_iterator> : std::true_type {}; + +template +struct locking : std::true_type {}; +template +struct locking>::nonlocking>> + : std::false_type {}; + +template FMT_CONSTEXPR inline auto is_locking() -> bool { + return locking::value; +} +template +FMT_CONSTEXPR inline auto is_locking() -> bool { + return locking::value || is_locking(); +} + +// An optimized version of std::copy with the output value type (T). +template ::value)> +auto copy(InputIt begin, InputIt end, OutputIt out) -> OutputIt { + get_container(out).append(begin, end); + return out; +} + +template ::value)> +FMT_CONSTEXPR auto copy(InputIt begin, InputIt end, OutputIt out) -> OutputIt { + while (begin != end) *out++ = static_cast(*begin++); + return out; +} + +template +FMT_CONSTEXPR auto copy(basic_string_view s, OutputIt out) -> OutputIt { + return copy(s.begin(), s.end(), out); +} + +template +constexpr auto has_const_formatter_impl(T*) + -> decltype(typename Context::template formatter_type().format( + std::declval(), std::declval()), + true) { + return true; +} +template +constexpr auto has_const_formatter_impl(...) -> bool { + return false; +} +template +constexpr auto has_const_formatter() -> bool { + return has_const_formatter_impl(static_cast(nullptr)); +} + +template +struct is_buffer_appender : std::false_type {}; +template +struct is_buffer_appender< + It, bool_constant< + is_back_insert_iterator::value && + std::is_base_of, + typename It::container_type>::value>> + : std::true_type {}; + +// Maps an output iterator to a buffer. +template ::value)> +auto get_buffer(OutputIt out) -> iterator_buffer { + return iterator_buffer(out); +} +template ::value)> +auto get_buffer(OutputIt out) -> buffer& { + return get_container(out); +} + +template +auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) { + return buf.out(); +} +template +auto get_iterator(buffer&, OutputIt out) -> OutputIt { + return out; +} + +struct view {}; + +template struct named_arg : view { + const Char* name; + const T& value; + named_arg(const Char* n, const T& v) : name(n), value(v) {} +}; + +template struct named_arg_info { + const Char* name; + int id; +}; + +template struct is_named_arg : std::false_type {}; +template struct is_statically_named_arg : std::false_type {}; + +template +struct is_named_arg> : std::true_type {}; + +template constexpr auto count() -> size_t { return B ? 1 : 0; } +template constexpr auto count() -> size_t { + return (B1 ? 1 : 0) + count(); +} + +template constexpr auto count_named_args() -> size_t { + return count::value...>(); +} + +template +constexpr auto count_statically_named_args() -> size_t { + return count::value...>(); +} + +struct unformattable {}; +struct unformattable_char : unformattable {}; +struct unformattable_pointer : unformattable {}; + +template struct string_value { + const Char* data; + size_t size; +}; + +template struct named_arg_value { + const named_arg_info* data; + size_t size; +}; + +template struct custom_value { + using parse_context = typename Context::parse_context_type; + void* value; + void (*format)(void* arg, parse_context& parse_ctx, Context& ctx); +}; + +// A formatting argument value. +template class value { + public: + using char_type = typename Context::char_type; + + union { + monostate no_value; + int int_value; + unsigned uint_value; + long long long_long_value; + unsigned long long ulong_long_value; + int128_opt int128_value; + uint128_opt uint128_value; + bool bool_value; + char_type char_value; + float float_value; + double double_value; + long double long_double_value; + const void* pointer; + string_value string; + custom_value custom; + named_arg_value named_args; + }; + + constexpr FMT_ALWAYS_INLINE value() : no_value() {} + constexpr FMT_ALWAYS_INLINE value(int val) : int_value(val) {} + constexpr FMT_ALWAYS_INLINE value(unsigned val) : uint_value(val) {} + constexpr FMT_ALWAYS_INLINE value(long long val) : long_long_value(val) {} + constexpr FMT_ALWAYS_INLINE value(unsigned long long val) + : ulong_long_value(val) {} + FMT_ALWAYS_INLINE value(int128_opt val) : int128_value(val) {} + FMT_ALWAYS_INLINE value(uint128_opt val) : uint128_value(val) {} + constexpr FMT_ALWAYS_INLINE value(float val) : float_value(val) {} + constexpr FMT_ALWAYS_INLINE value(double val) : double_value(val) {} + FMT_ALWAYS_INLINE value(long double val) : long_double_value(val) {} + constexpr FMT_ALWAYS_INLINE value(bool val) : bool_value(val) {} + constexpr FMT_ALWAYS_INLINE value(char_type val) : char_value(val) {} + FMT_CONSTEXPR FMT_ALWAYS_INLINE value(const char_type* val) { + string.data = val; + if (is_constant_evaluated()) string.size = {}; + } + FMT_CONSTEXPR FMT_ALWAYS_INLINE value(basic_string_view val) { + string.data = val.data(); + string.size = val.size(); + } + FMT_ALWAYS_INLINE value(const void* val) : pointer(val) {} + FMT_ALWAYS_INLINE value(const named_arg_info* args, size_t size) + : named_args{args, size} {} + + template FMT_CONSTEXPR20 FMT_ALWAYS_INLINE value(T& val) { + using value_type = remove_const_t; + // T may overload operator& e.g. std::vector::reference in libc++. +#if defined(__cpp_if_constexpr) + if constexpr (std::is_same::value) + custom.value = const_cast(&val); +#endif + if (!is_constant_evaluated()) + custom.value = const_cast(&reinterpret_cast(val)); + // Get the formatter type through the context to allow different contexts + // have different extension points, e.g. `formatter` for `format` and + // `printf_formatter` for `printf`. + custom.format = format_custom_arg< + value_type, typename Context::template formatter_type>; + } + value(unformattable); + value(unformattable_char); + value(unformattable_pointer); + + private: + // Formats an argument of a custom type, such as a user-defined class. + template + static void format_custom_arg(void* arg, + typename Context::parse_context_type& parse_ctx, + Context& ctx) { + auto f = Formatter(); + parse_ctx.advance_to(f.parse(parse_ctx)); + using qualified_type = + conditional_t(), const T, T>; + // format must be const for compatibility with std::format and compilation. + const auto& cf = f; + ctx.advance_to(cf.format(*static_cast(arg), ctx)); + } +}; + +// To minimize the number of types we need to deal with, long is translated +// either to int or to long long depending on its size. +enum { long_short = sizeof(long) == sizeof(int) }; +using long_type = conditional_t; +using ulong_type = conditional_t; + +template struct format_as_result { + template ::value || std::is_class::value)> + static auto map(U*) -> remove_cvref_t()))>; + static auto map(...) -> void; + + using type = decltype(map(static_cast(nullptr))); +}; +template using format_as_t = typename format_as_result::type; + +template +struct has_format_as + : bool_constant, void>::value> {}; + +#define FMT_MAP_API FMT_CONSTEXPR FMT_ALWAYS_INLINE + +// Maps formatting arguments to core types. +// arg_mapper reports errors by returning unformattable instead of using +// static_assert because it's used in the is_formattable trait. +template struct arg_mapper { + using char_type = typename Context::char_type; + + FMT_MAP_API auto map(signed char val) -> int { return val; } + FMT_MAP_API auto map(unsigned char val) -> unsigned { return val; } + FMT_MAP_API auto map(short val) -> int { return val; } + FMT_MAP_API auto map(unsigned short val) -> unsigned { return val; } + FMT_MAP_API auto map(int val) -> int { return val; } + FMT_MAP_API auto map(unsigned val) -> unsigned { return val; } + FMT_MAP_API auto map(long val) -> long_type { return val; } + FMT_MAP_API auto map(unsigned long val) -> ulong_type { return val; } + FMT_MAP_API auto map(long long val) -> long long { return val; } + FMT_MAP_API auto map(unsigned long long val) -> unsigned long long { + return val; + } + FMT_MAP_API auto map(int128_opt val) -> int128_opt { return val; } + FMT_MAP_API auto map(uint128_opt val) -> uint128_opt { return val; } + FMT_MAP_API auto map(bool val) -> bool { return val; } + + template ::value || + std::is_same::value)> + FMT_MAP_API auto map(T val) -> char_type { + return val; + } + template ::value || +#ifdef __cpp_char8_t + std::is_same::value || +#endif + std::is_same::value || + std::is_same::value) && + !std::is_same::value, + int> = 0> + FMT_MAP_API auto map(T) -> unformattable_char { + return {}; + } + + FMT_MAP_API auto map(float val) -> float { return val; } + FMT_MAP_API auto map(double val) -> double { return val; } + FMT_MAP_API auto map(long double val) -> long double { return val; } + + FMT_MAP_API auto map(char_type* val) -> const char_type* { return val; } + FMT_MAP_API auto map(const char_type* val) -> const char_type* { return val; } + template , + FMT_ENABLE_IF(std::is_same::value && + !std::is_pointer::value)> + FMT_MAP_API auto map(const T& val) -> basic_string_view { + return to_string_view(val); + } + template , + FMT_ENABLE_IF(!std::is_same::value && + !std::is_pointer::value)> + FMT_MAP_API auto map(const T&) -> unformattable_char { + return {}; + } + + FMT_MAP_API auto map(void* val) -> const void* { return val; } + FMT_MAP_API auto map(const void* val) -> const void* { return val; } + FMT_MAP_API auto map(volatile void* val) -> const void* { + return const_cast(val); + } + FMT_MAP_API auto map(const volatile void* val) -> const void* { + return const_cast(val); + } + FMT_MAP_API auto map(std::nullptr_t val) -> const void* { return val; } + + // Use SFINAE instead of a const T* parameter to avoid a conflict with the + // array overload. + template < + typename T, + FMT_ENABLE_IF( + std::is_pointer::value || std::is_member_pointer::value || + std::is_function::type>::value || + (std::is_array::value && + !std::is_convertible::value))> + FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer { + return {}; + } + + template ::value)> + FMT_MAP_API auto map(const T (&values)[N]) -> const T (&)[N] { + return values; + } + + // Only map owning types because mapping views can be unsafe. + template , + FMT_ENABLE_IF(std::is_arithmetic::value)> + FMT_MAP_API auto map(const T& val) -> decltype(FMT_DECLTYPE_THIS map(U())) { + return map(format_as(val)); + } + + template > + struct formattable : bool_constant() || + (has_formatter::value && + !std::is_const::value)> {}; + + template ::value)> + FMT_MAP_API auto do_map(T& val) -> T& { + return val; + } + template ::value)> + FMT_MAP_API auto do_map(T&) -> unformattable { + return {}; + } + + // is_fundamental is used to allow formatters for extended FP types. + template , + FMT_ENABLE_IF( + (std::is_class::value || std::is_enum::value || + std::is_union::value || std::is_fundamental::value) && + !has_to_string_view::value && !is_char::value && + !is_named_arg::value && !std::is_integral::value && + !std::is_arithmetic>::value)> + FMT_MAP_API auto map(T& val) -> decltype(FMT_DECLTYPE_THIS do_map(val)) { + return do_map(val); + } + + template ::value)> + FMT_MAP_API auto map(const T& named_arg) + -> decltype(FMT_DECLTYPE_THIS map(named_arg.value)) { + return map(named_arg.value); + } + + auto map(...) -> unformattable { return {}; } +}; + +// A type constant after applying arg_mapper. +template +using mapped_type_constant = + type_constant().map(std::declval())), + typename Context::char_type>; + +enum { packed_arg_bits = 4 }; +// Maximum number of arguments with packed types. +enum { max_packed_args = 62 / packed_arg_bits }; +enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; +enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; + +template +struct is_output_iterator : std::false_type {}; + +template <> struct is_output_iterator : std::true_type {}; + +template +struct is_output_iterator< + It, T, void_t()++ = std::declval())>> + : std::true_type {}; + +// A type-erased reference to an std::locale to avoid a heavy include. +class locale_ref { + private: + const void* locale_; // A type-erased pointer to std::locale. + + public: + constexpr locale_ref() : locale_(nullptr) {} + template explicit locale_ref(const Locale& loc); + + explicit operator bool() const noexcept { return locale_ != nullptr; } + + template auto get() const -> Locale; +}; + +template constexpr auto encode_types() -> unsigned long long { + return 0; +} + +template +constexpr auto encode_types() -> unsigned long long { + return static_cast(mapped_type_constant::value) | + (encode_types() << packed_arg_bits); +} + +template +constexpr unsigned long long make_descriptor() { + return NUM_ARGS <= max_packed_args ? encode_types() + : is_unpacked_bit | NUM_ARGS; +} + +// This type is intentionally undefined, only used for errors. +template +#if FMT_CLANG_VERSION && FMT_CLANG_VERSION <= 1500 +// https://github.com/fmtlib/fmt/issues/3796 +struct type_is_unformattable_for { +}; +#else +struct type_is_unformattable_for; +#endif + +template +FMT_CONSTEXPR auto make_arg(T& val) -> value { + using arg_type = remove_cvref_t().map(val))>; + + // Use enum instead of constexpr because the latter may generate code. + enum { + formattable_char = !std::is_same::value + }; + static_assert(formattable_char, "Mixing character types is disallowed."); + + // Formatting of arbitrary pointers is disallowed. If you want to format a + // pointer cast it to `void*` or `const void*`. In particular, this forbids + // formatting of `[const] volatile char*` printed as bool by iostreams. + enum { + formattable_pointer = !std::is_same::value + }; + static_assert(formattable_pointer, + "Formatting of non-void pointers is disallowed."); + + enum { formattable = !std::is_same::value }; +#if defined(__cpp_if_constexpr) + if constexpr (!formattable) + type_is_unformattable_for _; +#endif + static_assert( + formattable, + "Cannot format an argument. To make type T formattable provide a " + "formatter specialization: https://fmt.dev/latest/api.html#udt"); + return {arg_mapper().map(val)}; +} + +template +FMT_CONSTEXPR auto make_arg(T& val) -> basic_format_arg { + auto arg = basic_format_arg(); + arg.type_ = mapped_type_constant::value; + arg.value_ = make_arg(val); + return arg; +} + +template +FMT_CONSTEXPR inline auto make_arg(T& val) -> basic_format_arg { + return make_arg(val); +} + +template +using arg_t = conditional_t, + basic_format_arg>; + +template ::value)> +void init_named_arg(named_arg_info*, int& arg_index, int&, const T&) { + ++arg_index; +} +template ::value)> +void init_named_arg(named_arg_info* named_args, int& arg_index, + int& named_arg_index, const T& arg) { + named_args[named_arg_index++] = {arg.name, arg_index++}; +} + +// An array of references to arguments. It can be implicitly converted to +// `fmt::basic_format_args` for passing into type-erased formatting functions +// such as `fmt::vformat`. +template +struct format_arg_store { + // args_[0].named_args points to named_args to avoid bloating format_args. + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + static constexpr size_t ARGS_ARR_SIZE = 1 + (NUM_ARGS != 0 ? NUM_ARGS : +1); + + arg_t args[ARGS_ARR_SIZE]; + named_arg_info named_args[NUM_NAMED_ARGS]; + + template + FMT_MAP_API format_arg_store(T&... values) + : args{{named_args, NUM_NAMED_ARGS}, + make_arg(values)...} { + using dummy = int[]; + int arg_index = 0, named_arg_index = 0; + (void)dummy{ + 0, + (init_named_arg(named_args, arg_index, named_arg_index, values), 0)...}; + } + + format_arg_store(format_arg_store&& rhs) { + args[0] = {named_args, NUM_NAMED_ARGS}; + for (size_t i = 1; i < ARGS_ARR_SIZE; ++i) args[i] = rhs.args[i]; + for (size_t i = 0; i < NUM_NAMED_ARGS; ++i) + named_args[i] = rhs.named_args[i]; + } + + format_arg_store(const format_arg_store& rhs) = delete; + format_arg_store& operator=(const format_arg_store& rhs) = delete; + format_arg_store& operator=(format_arg_store&& rhs) = delete; +}; + +// A specialization of format_arg_store without named arguments. +// It is a plain struct to reduce binary size in debug mode. +template +struct format_arg_store { + // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. + arg_t args[NUM_ARGS != 0 ? NUM_ARGS : +1]; +}; + +} // namespace detail +FMT_BEGIN_EXPORT + +// A formatting argument. Context is a template parameter for the compiled API +// where output can be unbuffered. +template class basic_format_arg { + private: + detail::value value_; + detail::type type_; + + template + friend FMT_CONSTEXPR auto detail::make_arg(T& value) + -> basic_format_arg; + + friend class basic_format_args; + friend class dynamic_format_arg_store; + + using char_type = typename Context::char_type; + + template + friend struct detail::format_arg_store; + + basic_format_arg(const detail::named_arg_info* args, size_t size) + : value_(args, size) {} + + public: + class handle { + public: + explicit handle(detail::custom_value custom) : custom_(custom) {} + + void format(typename Context::parse_context_type& parse_ctx, + Context& ctx) const { + custom_.format(custom_.value, parse_ctx, ctx); + } + + private: + detail::custom_value custom_; + }; + + constexpr basic_format_arg() : type_(detail::type::none_type) {} + + constexpr explicit operator bool() const noexcept { + return type_ != detail::type::none_type; + } + + auto type() const -> detail::type { return type_; } + + auto is_integral() const -> bool { return detail::is_integral_type(type_); } + auto is_arithmetic() const -> bool { + return detail::is_arithmetic_type(type_); + } + + /** + * Visits an argument dispatching to the appropriate visit method based on + * the argument type. For example, if the argument type is `double` then + * `vis(value)` will be called with the value of type `double`. + */ + template + FMT_CONSTEXPR FMT_INLINE auto visit(Visitor&& vis) const -> decltype(vis(0)) { + switch (type_) { + case detail::type::none_type: + break; + case detail::type::int_type: + return vis(value_.int_value); + case detail::type::uint_type: + return vis(value_.uint_value); + case detail::type::long_long_type: + return vis(value_.long_long_value); + case detail::type::ulong_long_type: + return vis(value_.ulong_long_value); + case detail::type::int128_type: + return vis(detail::convert_for_visit(value_.int128_value)); + case detail::type::uint128_type: + return vis(detail::convert_for_visit(value_.uint128_value)); + case detail::type::bool_type: + return vis(value_.bool_value); + case detail::type::char_type: + return vis(value_.char_value); + case detail::type::float_type: + return vis(value_.float_value); + case detail::type::double_type: + return vis(value_.double_value); + case detail::type::long_double_type: + return vis(value_.long_double_value); + case detail::type::cstring_type: + return vis(value_.string.data); + case detail::type::string_type: + using sv = basic_string_view; + return vis(sv(value_.string.data, value_.string.size)); + case detail::type::pointer_type: + return vis(value_.pointer); + case detail::type::custom_type: + return vis(typename basic_format_arg::handle(value_.custom)); + } + return vis(monostate()); + } + + auto format_custom(const char_type* parse_begin, + typename Context::parse_context_type& parse_ctx, + Context& ctx) -> bool { + if (type_ != detail::type::custom_type) return false; + parse_ctx.advance_to(parse_begin); + value_.custom.format(value_.custom.value, parse_ctx, ctx); + return true; + } +}; + +template +FMT_DEPRECATED FMT_CONSTEXPR auto visit_format_arg( + Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { + return arg.visit(static_cast(vis)); +} + +/** + * A view of a collection of formatting arguments. To avoid lifetime issues it + * should only be used as a parameter type in type-erased functions such as + * `vformat`: + * + * void vlog(fmt::string_view fmt, fmt::format_args args); // OK + * fmt::format_args args = fmt::make_format_args(); // Dangling reference + */ +template class basic_format_args { + public: + using size_type = int; + using format_arg = basic_format_arg; + + private: + // A descriptor that contains information about formatting arguments. + // If the number of arguments is less or equal to max_packed_args then + // argument types are passed in the descriptor. This reduces binary code size + // per formatting function call. + unsigned long long desc_; + union { + // If is_packed() returns true then argument values are stored in values_; + // otherwise they are stored in args_. This is done to improve cache + // locality and reduce compiled code size since storing larger objects + // may require more code (at least on x86-64) even if the same amount of + // data is actually copied to stack. It saves ~10% on the bloat test. + const detail::value* values_; + const format_arg* args_; + }; + + constexpr auto is_packed() const -> bool { + return (desc_ & detail::is_unpacked_bit) == 0; + } + constexpr auto has_named_args() const -> bool { + return (desc_ & detail::has_named_args_bit) != 0; + } + + FMT_CONSTEXPR auto type(int index) const -> detail::type { + int shift = index * detail::packed_arg_bits; + unsigned int mask = (1 << detail::packed_arg_bits) - 1; + return static_cast((desc_ >> shift) & mask); + } + + public: + constexpr basic_format_args() : desc_(0), args_(nullptr) {} + + /// Constructs a `basic_format_args` object from `format_arg_store`. + template + constexpr FMT_ALWAYS_INLINE basic_format_args( + const detail::format_arg_store& + store) + : desc_(DESC), values_(store.args + (NUM_NAMED_ARGS != 0 ? 1 : 0)) {} + + template detail::max_packed_args)> + constexpr basic_format_args( + const detail::format_arg_store& + store) + : desc_(DESC), args_(store.args + (NUM_NAMED_ARGS != 0 ? 1 : 0)) {} + + /// Constructs a `basic_format_args` object from `dynamic_format_arg_store`. + constexpr basic_format_args(const dynamic_format_arg_store& store) + : desc_(store.get_types()), args_(store.data()) {} + + /// Constructs a `basic_format_args` object from a dynamic list of arguments. + constexpr basic_format_args(const format_arg* args, int count) + : desc_(detail::is_unpacked_bit | detail::to_unsigned(count)), + args_(args) {} + + /// Returns the argument with the specified id. + FMT_CONSTEXPR auto get(int id) const -> format_arg { + format_arg arg; + if (!is_packed()) { + if (id < max_size()) arg = args_[id]; + return arg; + } + if (static_cast(id) >= detail::max_packed_args) return arg; + arg.type_ = type(id); + if (arg.type_ == detail::type::none_type) return arg; + arg.value_ = values_[id]; + return arg; + } + + template + auto get(basic_string_view name) const -> format_arg { + int id = get_id(name); + return id >= 0 ? get(id) : format_arg(); + } + + template + FMT_CONSTEXPR auto get_id(basic_string_view name) const -> int { + if (!has_named_args()) return -1; + const auto& named_args = + (is_packed() ? values_[-1] : args_[-1].value_).named_args; + for (size_t i = 0; i < named_args.size; ++i) { + if (named_args.data[i].name == name) return named_args.data[i].id; + } + return -1; + } + + auto max_size() const -> int { + unsigned long long max_packed = detail::max_packed_args; + return static_cast(is_packed() ? max_packed + : desc_ & ~detail::is_unpacked_bit); + } +}; + +// A formatting context. +class context { + private: + appender out_; + basic_format_args args_; + detail::locale_ref loc_; + + public: + /// The character type for the output. + using char_type = char; + + using iterator = appender; + using format_arg = basic_format_arg; + using parse_context_type = basic_format_parse_context; + template using formatter_type = formatter; + + /// Constructs a `basic_format_context` object. References to the arguments + /// are stored in the object so make sure they have appropriate lifetimes. + FMT_CONSTEXPR context(iterator out, basic_format_args ctx_args, + detail::locale_ref loc = {}) + : out_(out), args_(ctx_args), loc_(loc) {} + context(context&&) = default; + context(const context&) = delete; + void operator=(const context&) = delete; + + FMT_CONSTEXPR auto arg(int id) const -> format_arg { return args_.get(id); } + auto arg(string_view name) -> format_arg { return args_.get(name); } + FMT_CONSTEXPR auto arg_id(string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { return args_; } + + // Returns an iterator to the beginning of the output range. + FMT_CONSTEXPR auto out() -> iterator { return out_; } + + // Advances the begin iterator to `it`. + void advance_to(iterator) {} + + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } +}; + +template class generic_context; + +// Longer aliases for C++20 compatibility. +template +using basic_format_context = + conditional_t::value, context, + generic_context>; +using format_context = context; + +template +using buffered_context = basic_format_context, Char>; + +template +using is_formattable = bool_constant>() + .map(std::declval()))>::value>; + +#if FMT_USE_CONCEPTS +template +concept formattable = is_formattable, Char>::value; +#endif + +/** + * Constructs an object that stores references to arguments and can be + * implicitly converted to `format_args`. `Context` can be omitted in which case + * it defaults to `format_context`. See `arg` for lifetime considerations. + */ +// Take arguments by lvalue references to avoid some lifetime issues, e.g. +// auto args = make_format_args(std::string()); +template (), + unsigned long long DESC = detail::make_descriptor(), + FMT_ENABLE_IF(NUM_NAMED_ARGS == 0)> +constexpr FMT_ALWAYS_INLINE auto make_format_args(T&... args) + -> detail::format_arg_store { + return {{detail::make_arg( + args)...}}; +} + +#ifndef FMT_DOC +template (), + unsigned long long DESC = + detail::make_descriptor() | + static_cast(detail::has_named_args_bit), + FMT_ENABLE_IF(NUM_NAMED_ARGS != 0)> +constexpr auto make_format_args(T&... args) + -> detail::format_arg_store { + return {args...}; +} +#endif + +/** + * Returns a named argument to be used in a formatting function. + * It should only be used in a call to a formatting function or + * `dynamic_format_arg_store::push_back`. + * + * **Example**: + * + * fmt::print("The answer is {answer}.", fmt::arg("answer", 42)); + */ +template +inline auto arg(const Char* name, const T& arg) -> detail::named_arg { + static_assert(!detail::is_named_arg(), "nested named arguments"); + return {name, arg}; +} +FMT_END_EXPORT + +/// An alias for `basic_format_args`. +// A separate type would result in shorter symbols but break ABI compatibility +// between clang and gcc on ARM (#1919). +FMT_EXPORT using format_args = basic_format_args; + +// We cannot use enum classes as bit fields because of a gcc bug, so we put them +// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414). +// Additionally, if an underlying type is specified, older gcc incorrectly warns +// that the type is too small. Both bugs are fixed in gcc 9.3. +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903 +# define FMT_ENUM_UNDERLYING_TYPE(type) +#else +# define FMT_ENUM_UNDERLYING_TYPE(type) : type +#endif +namespace align { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center, + numeric}; +} +using align_t = align::type; +namespace sign { +enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space}; +} +using sign_t = sign::type; + +namespace detail { + +template +using unsigned_char = typename conditional_t::value, + std::make_unsigned, + type_identity>::type; + +// Character (code unit) type is erased to prevent template bloat. +struct fill_t { + private: + enum { max_size = 4 }; + char data_[max_size] = {' '}; + unsigned char size_ = 1; + + public: + template + FMT_CONSTEXPR void operator=(basic_string_view s) { + auto size = s.size(); + size_ = static_cast(size); + if (size == 1) { + unsigned uchar = static_cast>(s[0]); + data_[0] = static_cast(uchar); + data_[1] = static_cast(uchar >> 8); + return; + } + FMT_ASSERT(size <= max_size, "invalid fill"); + for (size_t i = 0; i < size; ++i) data_[i] = static_cast(s[i]); + } + + FMT_CONSTEXPR void operator=(char c) { + data_[0] = c; + size_ = 1; + } + + constexpr auto size() const -> size_t { return size_; } + + template constexpr auto get() const -> Char { + using uchar = unsigned char; + return static_cast(static_cast(data_[0]) | + (static_cast(data_[1]) << 8)); + } + + template ::value)> + constexpr auto data() const -> const Char* { + return data_; + } + template ::value)> + constexpr auto data() const -> const Char* { + return nullptr; + } +}; +} // namespace detail + +enum class presentation_type : unsigned char { + // Common specifiers: + none = 0, + debug = 1, // '?' + string = 2, // 's' (string, bool) + + // Integral, bool and character specifiers: + dec = 3, // 'd' + hex, // 'x' or 'X' + oct, // 'o' + bin, // 'b' or 'B' + chr, // 'c' + + // String and pointer specifiers: + pointer = 3, // 'p' + + // Floating-point specifiers: + exp = 1, // 'e' or 'E' (1 since there is no FP debug presentation) + fixed, // 'f' or 'F' + general, // 'g' or 'G' + hexfloat // 'a' or 'A' +}; + +// Format specifiers for built-in and string types. +struct format_specs { + int width; + int precision; + presentation_type type; + align_t align : 4; + sign_t sign : 3; + bool upper : 1; // An uppercase version e.g. 'X' for 'x'. + bool alt : 1; // Alternate form ('#'). + bool localized : 1; + detail::fill_t fill; + + constexpr format_specs() + : width(0), + precision(-1), + type(presentation_type::none), + align(align::none), + sign(sign::none), + upper(false), + alt(false), + localized(false) {} +}; + +namespace detail { + +enum class arg_id_kind { none, index, name }; + +// An argument reference. +template struct arg_ref { + FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} + + FMT_CONSTEXPR explicit arg_ref(int index) + : kind(arg_id_kind::index), val(index) {} + FMT_CONSTEXPR explicit arg_ref(basic_string_view name) + : kind(arg_id_kind::name), val(name) {} + + FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { + kind = arg_id_kind::index; + val.index = idx; + return *this; + } + + arg_id_kind kind; + union value { + FMT_CONSTEXPR value(int idx = 0) : index(idx) {} + FMT_CONSTEXPR value(basic_string_view n) : name(n) {} + + int index; + basic_string_view name; + } val; +}; + +// Format specifiers with width and precision resolved at formatting rather +// than parsing time to allow reusing the same parsed specifiers with +// different sets of arguments (precompilation of format strings). +template struct dynamic_format_specs : format_specs { + arg_ref width_ref; + arg_ref precision_ref; +}; + +// Converts a character to ASCII. Returns '\0' on conversion failure. +template ::value)> +constexpr auto to_ascii(Char c) -> char { + return c <= 0xff ? static_cast(c) : '\0'; +} + +// Returns the number of code units in a code point or 1 on error. +template +FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { + if (const_check(sizeof(Char) != 1)) return 1; + auto c = static_cast(*begin); + return static_cast((0x3a55000000000000ull >> (2 * (c >> 3))) & 0x3) + 1; +} + +// Return the result via the out param to workaround gcc bug 77539. +template +FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { + for (out = first; out != last; ++out) { + if (*out == value) return true; + } + return false; +} + +template <> +inline auto find(const char* first, const char* last, char value, + const char*& out) -> bool { + out = + static_cast(memchr(first, value, to_unsigned(last - first))); + return out != nullptr; +} + +// Parses the range [begin, end) as an unsigned integer. This function assumes +// that the range is non-empty and the first character is a digit. +template +FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, + int error_value) noexcept -> int { + FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); + unsigned value = 0, prev = 0; + auto p = begin; + do { + prev = value; + value = value * 10 + unsigned(*p - '0'); + ++p; + } while (p != end && '0' <= *p && *p <= '9'); + auto num_digits = p - begin; + begin = p; + int digits10 = static_cast(sizeof(int) * CHAR_BIT * 3 / 10); + if (num_digits <= digits10) return static_cast(value); + // Check for overflow. + unsigned max = INT_MAX; + return num_digits == digits10 + 1 && + prev * 10ull + unsigned(p[-1] - '0') <= max + ? static_cast(value) + : error_value; +} + +FMT_CONSTEXPR inline auto parse_align(char c) -> align_t { + switch (c) { + case '<': + return align::left; + case '>': + return align::right; + case '^': + return align::center; + } + return align::none; +} + +template constexpr auto is_name_start(Char c) -> bool { + return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_'; +} + +template +FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + Char c = *begin; + if (c >= '0' && c <= '9') { + int index = 0; + if (c != '0') + index = parse_nonnegative_int(begin, end, INT_MAX); + else + ++begin; + if (begin == end || (*begin != '}' && *begin != ':')) + report_error("invalid format string"); + else + handler.on_index(index); + return begin; + } + if (!is_name_start(c)) { + report_error("invalid format string"); + return begin; + } + auto it = begin; + do { + ++it; + } while (it != end && (is_name_start(*it) || ('0' <= *it && *it <= '9'))); + handler.on_name({begin, to_unsigned(it - begin)}); + return it; +} + +template +FMT_CONSTEXPR auto parse_arg_id(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + FMT_ASSERT(begin != end, ""); + Char c = *begin; + if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); + handler.on_auto(); + return begin; +} + +template struct dynamic_spec_id_handler { + basic_format_parse_context& ctx; + arg_ref& ref; + + FMT_CONSTEXPR void on_auto() { + int id = ctx.next_arg_id(); + ref = arg_ref(id); + ctx.check_dynamic_spec(id); + } + FMT_CONSTEXPR void on_index(int id) { + ref = arg_ref(id); + ctx.check_arg_id(id); + ctx.check_dynamic_spec(id); + } + FMT_CONSTEXPR void on_name(basic_string_view id) { + ref = arg_ref(id); + ctx.check_arg_id(id); + } +}; + +// Parses [integer | "{" [arg_id] "}"]. +template +FMT_CONSTEXPR auto parse_dynamic_spec(const Char* begin, const Char* end, + int& value, arg_ref& ref, + basic_format_parse_context& ctx) + -> const Char* { + FMT_ASSERT(begin != end, ""); + if ('0' <= *begin && *begin <= '9') { + int val = parse_nonnegative_int(begin, end, -1); + if (val != -1) + value = val; + else + report_error("number is too big"); + } else if (*begin == '{') { + ++begin; + auto handler = dynamic_spec_id_handler{ctx, ref}; + if (begin != end) begin = parse_arg_id(begin, end, handler); + if (begin != end && *begin == '}') return ++begin; + report_error("invalid format string"); + } + return begin; +} + +template +FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, + int& value, arg_ref& ref, + basic_format_parse_context& ctx) + -> const Char* { + ++begin; + if (begin == end || *begin == '}') { + report_error("invalid precision"); + return begin; + } + return parse_dynamic_spec(begin, end, value, ref, ctx); +} + +enum class state { start, align, sign, hash, zero, width, precision, locale }; + +// Parses standard format specifiers. +template +FMT_CONSTEXPR auto parse_format_specs(const Char* begin, const Char* end, + dynamic_format_specs& specs, + basic_format_parse_context& ctx, + type arg_type) -> const Char* { + auto c = '\0'; + if (end - begin > 1) { + auto next = to_ascii(begin[1]); + c = parse_align(next) == align::none ? to_ascii(*begin) : '\0'; + } else { + if (begin == end) return begin; + c = to_ascii(*begin); + } + + struct { + state current_state = state::start; + FMT_CONSTEXPR void operator()(state s, bool valid = true) { + if (current_state >= s || !valid) + report_error("invalid format specifier"); + current_state = s; + } + } enter_state; + + using pres = presentation_type; + constexpr auto integral_set = sint_set | uint_set | bool_set | char_set; + struct { + const Char*& begin; + dynamic_format_specs& specs; + type arg_type; + + FMT_CONSTEXPR auto operator()(pres pres_type, int set) -> const Char* { + if (!in(arg_type, set)) { + if (arg_type == type::none_type) return begin; + report_error("invalid format specifier"); + } + specs.type = pres_type; + return begin + 1; + } + } parse_presentation_type{begin, specs, arg_type}; + + for (;;) { + switch (c) { + case '<': + case '>': + case '^': + enter_state(state::align); + specs.align = parse_align(c); + ++begin; + break; + case '+': + case '-': + case ' ': + if (arg_type == type::none_type) return begin; + enter_state(state::sign, in(arg_type, sint_set | float_set)); + switch (c) { + case '+': + specs.sign = sign::plus; + break; + case '-': + specs.sign = sign::minus; + break; + case ' ': + specs.sign = sign::space; + break; + } + ++begin; + break; + case '#': + if (arg_type == type::none_type) return begin; + enter_state(state::hash, is_arithmetic_type(arg_type)); + specs.alt = true; + ++begin; + break; + case '0': + enter_state(state::zero); + if (!is_arithmetic_type(arg_type)) { + if (arg_type == type::none_type) return begin; + report_error("format specifier requires numeric argument"); + } + if (specs.align == align::none) { + // Ignore 0 if align is specified for compatibility with std::format. + specs.align = align::numeric; + specs.fill = '0'; + } + ++begin; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case '{': + enter_state(state::width); + begin = parse_dynamic_spec(begin, end, specs.width, specs.width_ref, ctx); + break; + case '.': + if (arg_type == type::none_type) return begin; + enter_state(state::precision, + in(arg_type, float_set | string_set | cstring_set)); + begin = parse_precision(begin, end, specs.precision, specs.precision_ref, + ctx); + break; + case 'L': + if (arg_type == type::none_type) return begin; + enter_state(state::locale, is_arithmetic_type(arg_type)); + specs.localized = true; + ++begin; + break; + case 'd': + return parse_presentation_type(pres::dec, integral_set); + case 'X': + specs.upper = true; + FMT_FALLTHROUGH; + case 'x': + return parse_presentation_type(pres::hex, integral_set); + case 'o': + return parse_presentation_type(pres::oct, integral_set); + case 'B': + specs.upper = true; + FMT_FALLTHROUGH; + case 'b': + return parse_presentation_type(pres::bin, integral_set); + case 'E': + specs.upper = true; + FMT_FALLTHROUGH; + case 'e': + return parse_presentation_type(pres::exp, float_set); + case 'F': + specs.upper = true; + FMT_FALLTHROUGH; + case 'f': + return parse_presentation_type(pres::fixed, float_set); + case 'G': + specs.upper = true; + FMT_FALLTHROUGH; + case 'g': + return parse_presentation_type(pres::general, float_set); + case 'A': + specs.upper = true; + FMT_FALLTHROUGH; + case 'a': + return parse_presentation_type(pres::hexfloat, float_set); + case 'c': + if (arg_type == type::bool_type) report_error("invalid format specifier"); + return parse_presentation_type(pres::chr, integral_set); + case 's': + return parse_presentation_type(pres::string, + bool_set | string_set | cstring_set); + case 'p': + return parse_presentation_type(pres::pointer, pointer_set | cstring_set); + case '?': + return parse_presentation_type(pres::debug, + char_set | string_set | cstring_set); + case '}': + return begin; + default: { + if (*begin == '}') return begin; + // Parse fill and alignment. + auto fill_end = begin + code_point_length(begin); + if (end - fill_end <= 0) { + report_error("invalid format specifier"); + return begin; + } + if (*begin == '{') { + report_error("invalid fill character '{'"); + return begin; + } + auto align = parse_align(to_ascii(*fill_end)); + enter_state(state::align, align != align::none); + specs.fill = + basic_string_view(begin, to_unsigned(fill_end - begin)); + specs.align = align; + begin = fill_end + 1; + } + } + if (begin == end) return begin; + c = to_ascii(*begin); + } +} + +template +FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + struct id_adapter { + Handler& handler; + int arg_id; + + FMT_CONSTEXPR void on_auto() { arg_id = handler.on_arg_id(); } + FMT_CONSTEXPR void on_index(int id) { arg_id = handler.on_arg_id(id); } + FMT_CONSTEXPR void on_name(basic_string_view id) { + arg_id = handler.on_arg_id(id); + } + }; + + ++begin; + if (begin == end) return handler.on_error("invalid format string"), end; + if (*begin == '}') { + handler.on_replacement_field(handler.on_arg_id(), begin); + } else if (*begin == '{') { + handler.on_text(begin, begin + 1); + } else { + auto adapter = id_adapter{handler, 0}; + begin = parse_arg_id(begin, end, adapter); + Char c = begin != end ? *begin : Char(); + if (c == '}') { + handler.on_replacement_field(adapter.arg_id, begin); + } else if (c == ':') { + begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); + if (begin == end || *begin != '}') + return handler.on_error("unknown format specifier"), end; + } else { + return handler.on_error("missing '}' in format string"), end; + } + } + return begin + 1; +} + +template +FMT_CONSTEXPR void parse_format_string(basic_string_view format_str, + Handler&& handler) { + auto begin = format_str.data(); + auto end = begin + format_str.size(); + if (end - begin < 32) { + // Use a simple loop instead of memchr for small strings. + const Char* p = begin; + while (p != end) { + auto c = *p++; + if (c == '{') { + handler.on_text(begin, p - 1); + begin = p = parse_replacement_field(p - 1, end, handler); + } else if (c == '}') { + if (p == end || *p != '}') + return handler.on_error("unmatched '}' in format string"); + handler.on_text(begin, p); + begin = ++p; + } + } + handler.on_text(begin, end); + return; + } + struct writer { + FMT_CONSTEXPR void operator()(const Char* from, const Char* to) { + if (from == to) return; + for (;;) { + const Char* p = nullptr; + if (!find(from, to, Char('}'), p)) + return handler_.on_text(from, to); + ++p; + if (p == to || *p != '}') + return handler_.on_error("unmatched '}' in format string"); + handler_.on_text(from, p); + from = p + 1; + } + } + Handler& handler_; + } write = {handler}; + while (begin != end) { + // Doing two passes with memchr (one for '{' and another for '}') is up to + // 2.5x faster than the naive one-pass implementation on big format strings. + const Char* p = begin; + if (*begin != '{' && !find(begin + 1, end, Char('{'), p)) + return write(begin, end); + write(begin, p); + begin = parse_replacement_field(p, end, handler); + } +} + +template ::value> struct strip_named_arg { + using type = T; +}; +template struct strip_named_arg { + using type = remove_cvref_t; +}; + +template +FMT_VISIBILITY("hidden") // Suppress an ld warning on macOS (#3769). +FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) + -> decltype(ctx.begin()) { + using char_type = typename ParseContext::char_type; + using context = buffered_context; + using mapped_type = conditional_t< + mapped_type_constant::value != type::custom_type, + decltype(arg_mapper().map(std::declval())), + typename strip_named_arg::type>; +#if defined(__cpp_if_constexpr) + if constexpr (std::is_default_constructible< + formatter>::value) { + return formatter().parse(ctx); + } else { + type_is_unformattable_for _; + return ctx.begin(); + } +#else + return formatter().parse(ctx); +#endif +} + +// Checks char specs and returns true iff the presentation type is char-like. +FMT_CONSTEXPR inline auto check_char_specs(const format_specs& specs) -> bool { + if (specs.type != presentation_type::none && + specs.type != presentation_type::chr && + specs.type != presentation_type::debug) { + return false; + } + if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) + report_error("invalid format specifier for char"); + return true; +} + +#if FMT_USE_NONTYPE_TEMPLATE_ARGS +template +constexpr auto get_arg_index_by_name(basic_string_view name) -> int { + if constexpr (is_statically_named_arg()) { + if (name == T::name) return N; + } + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name(name); + (void)name; // Workaround an MSVC bug about "unused" parameter. + return -1; +} +#endif + +template +FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + if constexpr (sizeof...(Args) > 0) + return get_arg_index_by_name<0, Args...>(name); +#endif + (void)name; + return -1; +} + +template class format_string_checker { + private: + using parse_context_type = compile_parse_context; + static constexpr int num_args = sizeof...(Args); + + // Format specifier parsing function. + // In the future basic_format_parse_context will replace compile_parse_context + // here and will use is_constant_evaluated and downcasting to access the data + // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1. + using parse_func = const Char* (*)(parse_context_type&); + + type types_[num_args > 0 ? static_cast(num_args) : 1]; + parse_context_type context_; + parse_func parse_funcs_[num_args > 0 ? static_cast(num_args) : 1]; + + public: + explicit FMT_CONSTEXPR format_string_checker(basic_string_view fmt) + : types_{mapped_type_constant>::value...}, + context_(fmt, num_args, types_), + parse_funcs_{&parse_format_specs...} {} + + FMT_CONSTEXPR void on_text(const Char*, const Char*) {} + + FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } + FMT_CONSTEXPR auto on_arg_id(int id) -> int { + return context_.check_arg_id(id), id; + } + FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { +#if FMT_USE_NONTYPE_TEMPLATE_ARGS + auto index = get_arg_index_by_name(id); + if (index < 0) on_error("named argument is not found"); + return index; +#else + (void)id; + on_error("compile-time checks for named arguments require C++20 support"); + return 0; +#endif + } + + FMT_CONSTEXPR void on_replacement_field(int id, const Char* begin) { + on_format_specs(id, begin, begin); // Call parse() on empty specs. + } + + FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) + -> const Char* { + context_.advance_to(begin); + // id >= 0 check is a workaround for gcc 10 bug (#2065). + return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; + } + + FMT_NORETURN FMT_CONSTEXPR void on_error(const char* message) { + report_error(message); + } +}; + +// A base class for compile-time strings. +struct compile_string {}; + +template +using is_compile_string = std::is_base_of; + +// Reports a compile-time error if S is not a valid format string. +template ::value)> +FMT_ALWAYS_INLINE void check_format_string(const S&) { +#ifdef FMT_ENFORCE_COMPILE_STRING + static_assert(is_compile_string::value, + "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " + "FMT_STRING."); +#endif +} +template ::value)> +void check_format_string(S format_str) { + using char_t = typename S::char_type; + FMT_CONSTEXPR auto s = basic_string_view(format_str); + using checker = format_string_checker...>; + FMT_CONSTEXPR bool error = (parse_format_string(s, checker(s)), true); + ignore_unused(error); +} + +// Report truncation to prevent silent data loss. +inline void report_truncation(bool truncated) { + if (truncated) report_error("output is truncated"); +} + +// Use vformat_args and avoid type_identity to keep symbols short and workaround +// a GCC <= 4.8 bug. +template struct vformat_args { + using type = basic_format_args>; +}; +template <> struct vformat_args { + using type = format_args; +}; + +template +void vformat_to(buffer& buf, basic_string_view fmt, + typename vformat_args::type args, locale_ref loc = {}); + +FMT_API void vprint_mojibake(FILE*, string_view, format_args, bool = false); +#ifndef _WIN32 +inline void vprint_mojibake(FILE*, string_view, format_args, bool) {} +#endif + +template struct native_formatter { + private: + dynamic_format_specs specs_; + + public: + using nonlocking = void; + + template + FMT_CONSTEXPR auto parse(ParseContext& ctx) -> const Char* { + if (ctx.begin() == ctx.end() || *ctx.begin() == '}') return ctx.begin(); + auto end = parse_format_specs(ctx.begin(), ctx.end(), specs_, ctx, TYPE); + if (const_check(TYPE == type::char_type)) check_char_specs(specs_); + return end; + } + + template + FMT_CONSTEXPR void set_debug_format(bool set = true) { + specs_.type = set ? presentation_type::debug : presentation_type::none; + } + + template + FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const + -> decltype(ctx.out()); +}; +} // namespace detail + +FMT_BEGIN_EXPORT + +// A formatter specialization for natively supported types. +template +struct formatter::value != + detail::type::custom_type>> + : detail::native_formatter::value> { +}; + +template struct runtime_format_string { + basic_string_view str; +}; + +/// A compile-time format string. +template class basic_format_string { + private: + basic_string_view str_; + + public: + template < + typename S, + FMT_ENABLE_IF( + std::is_convertible>::value || + (detail::is_compile_string::value && + std::is_constructible, const S&>::value))> + FMT_CONSTEVAL FMT_ALWAYS_INLINE basic_format_string(const S& s) : str_(s) { + static_assert( + detail::count< + (std::is_base_of>::value && + std::is_reference::value)...>() == 0, + "passing views as lvalues is disallowed"); +#if FMT_USE_CONSTEVAL + if constexpr (detail::count_named_args() == + detail::count_statically_named_args()) { + using checker = + detail::format_string_checker...>; + detail::parse_format_string(str_, checker(s)); + } +#else + detail::check_format_string(s); +#endif + } + basic_format_string(runtime_format_string fmt) : str_(fmt.str) {} + + FMT_ALWAYS_INLINE operator basic_string_view() const { return str_; } + auto get() const -> basic_string_view { return str_; } +}; + +#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 +// Workaround broken conversion on older gcc. +template using format_string = string_view; +inline auto runtime(string_view s) -> string_view { return s; } +#else +template +using format_string = basic_format_string...>; +/** + * Creates a runtime format string. + * + * **Example**: + * + * // Check format string at runtime instead of compile-time. + * fmt::print(fmt::runtime("{:d}"), "I am not a number"); + */ +inline auto runtime(string_view s) -> runtime_format_string<> { return {{s}}; } +#endif + +/// Formats a string and writes the output to `out`. +template , + char>::value)> +auto vformat_to(OutputIt&& out, string_view fmt, format_args args) + -> remove_cvref_t { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, fmt, args, {}); + return detail::get_iterator(buf, out); +} + +/** + * Formats `args` according to specifications in `fmt`, writes the result to + * the output iterator `out` and returns the iterator past the end of the output + * range. `format_to` does not append a terminating null character. + * + * **Example**: + * + * auto out = std::vector(); + * fmt::format_to(std::back_inserter(out), "{}", 42); + */ +template , + char>::value)> +FMT_INLINE auto format_to(OutputIt&& out, format_string fmt, T&&... args) + -> remove_cvref_t { + return vformat_to(FMT_FWD(out), fmt, fmt::make_format_args(args...)); +} + +template struct format_to_n_result { + /// Iterator past the end of the output range. + OutputIt out; + /// Total (not truncated) output size. + size_t size; +}; + +template ::value)> +auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) + -> format_to_n_result { + using traits = detail::fixed_buffer_traits; + auto buf = detail::iterator_buffer(out, n); + detail::vformat_to(buf, fmt, args, {}); + return {buf.out(), buf.count()}; +} + +/** + * Formats `args` according to specifications in `fmt`, writes up to `n` + * characters of the result to the output iterator `out` and returns the total + * (not truncated) output size and the iterator past the end of the output + * range. `format_to_n` does not append a terminating null character. + */ +template ::value)> +FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, + T&&... args) -> format_to_n_result { + return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); +} + +template +struct format_to_result { + /// Iterator pointing to just after the last successful write in the range. + OutputIt out; + /// Specifies if the output was truncated. + bool truncated; + + FMT_CONSTEXPR operator OutputIt&() & { + detail::report_truncation(truncated); + return out; + } + FMT_CONSTEXPR operator const OutputIt&() const& { + detail::report_truncation(truncated); + return out; + } + FMT_CONSTEXPR operator OutputIt&&() && { + detail::report_truncation(truncated); + return static_cast(out); + } +}; + +template +auto vformat_to(char (&out)[N], string_view fmt, format_args args) + -> format_to_result { + auto result = vformat_to_n(out, N, fmt, args); + return {result.out, result.size > N}; +} + +template +FMT_INLINE auto format_to(char (&out)[N], format_string fmt, T&&... args) + -> format_to_result { + auto result = fmt::format_to_n(out, N, fmt, static_cast(args)...); + return {result.out, result.size > N}; +} + +/// Returns the number of chars in the output of `format(fmt, args...)`. +template +FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, + T&&... args) -> size_t { + auto buf = detail::counting_buffer<>(); + detail::vformat_to(buf, fmt, fmt::make_format_args(args...), {}); + return buf.count(); +} + +FMT_API void vprint(string_view fmt, format_args args); +FMT_API void vprint(FILE* f, string_view fmt, format_args args); +FMT_API void vprint_buffered(FILE* f, string_view fmt, format_args args); +FMT_API void vprintln(FILE* f, string_view fmt, format_args args); + +/** + * Formats `args` according to specifications in `fmt` and writes the output + * to `stdout`. + * + * **Example**: + * + * fmt::print("The answer is {}.", 42); + */ +template +FMT_INLINE void print(format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + if (!detail::use_utf8()) return detail::vprint_mojibake(stdout, fmt, vargs); + return detail::is_locking() ? vprint_buffered(stdout, fmt, vargs) + : vprint(fmt, vargs); +} + +/** + * Formats `args` according to specifications in `fmt` and writes the + * output to the file `f`. + * + * **Example**: + * + * fmt::print(stderr, "Don't {}!", "panic"); + */ +template +FMT_INLINE void print(FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + if (!detail::use_utf8()) return detail::vprint_mojibake(f, fmt, vargs); + return detail::is_locking() ? vprint_buffered(f, fmt, vargs) + : vprint(f, fmt, vargs); +} + +/// Formats `args` according to specifications in `fmt` and writes the output +/// to the file `f` followed by a newline. +template +FMT_INLINE void println(FILE* f, format_string fmt, T&&... args) { + const auto& vargs = fmt::make_format_args(args...); + return detail::use_utf8() ? vprintln(f, fmt, vargs) + : detail::vprint_mojibake(f, fmt, vargs, true); +} + +/// Formats `args` according to specifications in `fmt` and writes the output +/// to `stdout` followed by a newline. +template +FMT_INLINE void println(format_string fmt, T&&... args) { + return fmt::println(stdout, fmt, static_cast(args)...); +} + +FMT_END_EXPORT +FMT_GCC_PRAGMA("GCC pop_options") +FMT_END_NAMESPACE + +#ifdef FMT_HEADER_ONLY +# include "format.h" +#endif +#endif // FMT_BASE_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h index 1a8d8d04c2aa..c93123fd3353 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/chrono.h @@ -8,20 +8,31 @@ #ifndef FMT_CHRONO_H_ #define FMT_CHRONO_H_ -#include -#include -#include // std::isfinite -#include // std::memcpy -#include -#include -#include -#include -#include +#ifndef FMT_MODULE +# include +# include +# include // std::isfinite +# include // std::memcpy +# include +# include +# include +# include +# include +#endif #include "format.h" FMT_BEGIN_NAMESPACE +// Check if std::chrono::local_t is available. +#ifndef FMT_USE_LOCAL_TIME +# ifdef __cpp_lib_chrono +# define FMT_USE_LOCAL_TIME (__cpp_lib_chrono >= 201907L) +# else +# define FMT_USE_LOCAL_TIME 0 +# endif +#endif + // Check if std::chrono::utc_timestamp is available. #ifndef FMT_USE_UTC_TIME # ifdef __cpp_lib_chrono @@ -63,7 +74,8 @@ template ::value && std::numeric_limits::is_signed == std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -84,15 +96,14 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { return static_cast(from); } -/** - * converts From to To, without loss. If the dynamic value of from - * can't be converted to To without loss, ec is set. - */ +/// Converts From to To, without loss. If the dynamic value of from +/// can't be converted to To without loss, ec is set. template ::value && std::numeric_limits::is_signed != std::numeric_limits::is_signed)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; using F = std::numeric_limits; using T = std::numeric_limits; @@ -124,7 +135,8 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto lossless_integral_conversion(const From from, int& ec) + -> To { ec = 0; return from; } // function @@ -145,7 +157,7 @@ FMT_CONSTEXPR To lossless_integral_conversion(const From from, int& ec) { // clang-format on template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; using T = std::numeric_limits; static_assert(std::is_floating_point::value, "From must be floating"); @@ -167,20 +179,18 @@ FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { template ::value)> -FMT_CONSTEXPR To safe_float_conversion(const From from, int& ec) { +FMT_CONSTEXPR auto safe_float_conversion(const From from, int& ec) -> To { ec = 0; static_assert(std::is_floating_point::value, "From must be floating"); return from; } -/** - * safe duration cast between integral durations - */ +/// Safe duration cast between integral durations template ::value), FMT_ENABLE_IF(std::is_integral::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; // the basic idea is that we need to convert from count() in the from type @@ -212,7 +222,8 @@ To safe_duration_cast(std::chrono::duration from, } const auto min1 = (std::numeric_limits::min)() / Factor::num; - if (!std::is_unsigned::value && count < min1) { + if (detail::const_check(!std::is_unsigned::value) && + count < min1) { ec = 1; return {}; } @@ -224,14 +235,12 @@ To safe_duration_cast(std::chrono::duration from, return ec ? To() : To(tocount); } -/** - * safe duration_cast between floating point durations - */ +/// Safe duration_cast between floating point durations template ::value), FMT_ENABLE_IF(std::is_floating_point::value)> -To safe_duration_cast(std::chrono::duration from, - int& ec) { +auto safe_duration_cast(std::chrono::duration from, + int& ec) -> To { using From = std::chrono::duration; ec = 0; if (std::isnan(from.count())) { @@ -311,12 +320,45 @@ To safe_duration_cast(std::chrono::duration from, namespace detail { template struct null {}; -inline null<> localtime_r FMT_NOMACRO(...) { return null<>(); } -inline null<> localtime_s(...) { return null<>(); } -inline null<> gmtime_r(...) { return null<>(); } -inline null<> gmtime_s(...) { return null<>(); } +inline auto localtime_r FMT_NOMACRO(...) -> null<> { return null<>(); } +inline auto localtime_s(...) -> null<> { return null<>(); } +inline auto gmtime_r(...) -> null<> { return null<>(); } +inline auto gmtime_s(...) -> null<> { return null<>(); } + +// It is defined here and not in ostream.h because the latter has expensive +// includes. +template class formatbuf : public Streambuf { + private: + using char_type = typename Streambuf::char_type; + using streamsize = decltype(std::declval().sputn(nullptr, 0)); + using int_type = typename Streambuf::int_type; + using traits_type = typename Streambuf::traits_type; + + buffer& buffer_; + + public: + explicit formatbuf(buffer& buf) : buffer_(buf) {} + + protected: + // The put area is always empty. This makes the implementation simpler and has + // the advantage that the streambuf and the buffer are always in sync and + // sputc never writes into uninitialized memory. A disadvantage is that each + // call to sputc always results in a (virtual) call to overflow. There is no + // disadvantage here for sputn since this always results in a call to xsputn. + + auto overflow(int_type ch) -> int_type override { + if (!traits_type::eq_int_type(ch, traits_type::eof())) + buffer_.push_back(static_cast(ch)); + return ch; + } + + auto xsputn(const char_type* s, streamsize count) -> streamsize override { + buffer_.append(s, s + count); + return count; + } +}; -inline const std::locale& get_classic_locale() { +inline auto get_classic_locale() -> const std::locale& { static const auto& locale = std::locale::classic(); return locale; } @@ -326,8 +368,6 @@ template struct codecvt_result { CodeUnit buf[max_size]; CodeUnit* end; }; -template -constexpr const size_t codecvt_result::max_size; template void write_codecvt(codecvt_result& out, string_view in_buf, @@ -351,11 +391,12 @@ void write_codecvt(codecvt_result& out, string_view in_buf, template auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc) -> OutputIt { - if (detail::is_utf8() && loc != get_classic_locale()) { + if (detail::use_utf8() && loc != get_classic_locale()) { // char16_t and char32_t codecvts are broken in MSVC (linkage errors) and // gcc-4. -#if FMT_MSC_VERSION != 0 || \ - (defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI)) +#if FMT_MSC_VERSION != 0 || \ + (defined(__GLIBCXX__) && \ + (!defined(_GLIBCXX_USE_DUAL_ABI) || _GLIBCXX_USE_DUAL_ABI == 0)) // The _GLIBCXX_USE_DUAL_ABI macro is always defined in libstdc++ from gcc-5 // and newer. using code_unit = wchar_t; @@ -367,39 +408,13 @@ auto write_encoded_tm_str(OutputIt out, string_view in, const std::locale& loc) unit_t unit; write_codecvt(unit, in, loc); // In UTF-8 is used one to four one-byte code units. - auto&& buf = basic_memory_buffer(); - for (code_unit* p = unit.buf; p != unit.end; ++p) { - uint32_t c = static_cast(*p); - if (sizeof(code_unit) == 2 && c >= 0xd800 && c <= 0xdfff) { - // surrogate pair - ++p; - if (p == unit.end || (c & 0xfc00) != 0xd800 || - (*p & 0xfc00) != 0xdc00) { - FMT_THROW(format_error("failed to format time")); - } - c = (c << 10) + static_cast(*p) - 0x35fdc00; - } - if (c < 0x80) { - buf.push_back(static_cast(c)); - } else if (c < 0x800) { - buf.push_back(static_cast(0xc0 | (c >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { - buf.push_back(static_cast(0xe0 | (c >> 12))); - buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else if (c >= 0x10000 && c <= 0x10ffff) { - buf.push_back(static_cast(0xf0 | (c >> 18))); - buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); - buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); - buf.push_back(static_cast(0x80 | (c & 0x3f))); - } else { - FMT_THROW(format_error("failed to format time")); - } - } - return copy_str(buf.data(), buf.data() + buf.size(), out); + auto u = + to_utf8>(); + if (!u.convert({unit.buf, to_unsigned(unit.end - unit.buf)})) + FMT_THROW(format_error("failed to format time")); + return copy(u.c_str(), u.c_str() + u.size(), out); } - return copy_str(in.data(), in.data() + in.size(), out); + return copy(in.data(), in.data() + in.size(), out); } template OutputIt { codecvt_result unit; write_codecvt(unit, sv, loc); - return copy_str(unit.buf, unit.end, out); + return copy(unit.buf, unit.end, out); } template & buf, const std::tm& time, auto&& format_buf = formatbuf>(buf); auto&& os = std::basic_ostream(&format_buf); os.imbue(loc); - using iterator = std::ostreambuf_iterator; - const auto& facet = std::use_facet>(loc); + const auto& facet = std::use_facet>(loc); auto end = facet.put(os, os, Char(' '), &time, format, modifier); if (end.failed()) FMT_THROW(format_error("failed to format time")); } @@ -448,38 +462,83 @@ auto write(OutputIt out, const std::tm& time, const std::locale& loc, return write_encoded_tm_str(out, string_view(buf.data(), buf.size()), loc); } +template +struct is_same_arithmetic_type + : public std::integral_constant::value && + std::is_integral::value) || + (std::is_floating_point::value && + std::is_floating_point::value)> { +}; + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { +#if FMT_SAFE_DURATION_CAST + // Throwing version of safe_duration_cast is only available for + // integer to integer or float to float casts. + int ec; + To to = safe_duration_cast::safe_duration_cast(from, ec); + if (ec) FMT_THROW(format_error("cannot format duration")); + return to; +#else + // Standard duration cast, may overflow. + return std::chrono::duration_cast(from); +#endif +} + +template < + typename To, typename FromRep, typename FromPeriod, + FMT_ENABLE_IF(!is_same_arithmetic_type::value)> +auto fmt_duration_cast(std::chrono::duration from) -> To { + // Mixed integer <-> float cast is not supported by safe_duration_cast. + return std::chrono::duration_cast(from); +} + +template +auto to_time_t( + std::chrono::time_point time_point) + -> std::time_t { + // Cannot use std::chrono::system_clock::to_time_t since this would first + // require a cast to std::chrono::system_clock::time_point, which could + // overflow. + return fmt_duration_cast>( + time_point.time_since_epoch()) + .count(); +} } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT /** - Converts given time since epoch as ``std::time_t`` value into calendar time, - expressed in local time. Unlike ``std::localtime``, this function is - thread-safe on most platforms. + * Converts given time since epoch as `std::time_t` value into calendar time, + * expressed in local time. Unlike `std::localtime`, this function is + * thread-safe on most platforms. */ -inline std::tm localtime(std::time_t time) { +inline auto localtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(localtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(localtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { using namespace fmt::detail; std::tm* tm = std::localtime(&time_); if (tm) tm_ = *tm; @@ -493,57 +552,62 @@ inline std::tm localtime(std::time_t time) { return lt.tm_; } -inline std::tm localtime( - std::chrono::time_point time_point) { - return localtime(std::chrono::system_clock::to_time_t(time_point)); +#if FMT_USE_LOCAL_TIME +template +inline auto localtime(std::chrono::local_time time) -> std::tm { + return localtime( + detail::to_time_t(std::chrono::current_zone()->to_sys(time))); } +#endif /** - Converts given time since epoch as ``std::time_t`` value into calendar time, - expressed in Coordinated Universal Time (UTC). Unlike ``std::gmtime``, this - function is thread-safe on most platforms. + * Converts given time since epoch as `std::time_t` value into calendar time, + * expressed in Coordinated Universal Time (UTC). Unlike `std::gmtime`, this + * function is thread-safe on most platforms. */ -inline std::tm gmtime(std::time_t time) { +inline auto gmtime(std::time_t time) -> std::tm { struct dispatcher { std::time_t time_; std::tm tm_; dispatcher(std::time_t t) : time_(t) {} - bool run() { + auto run() -> bool { using namespace fmt::detail; return handle(gmtime_r(&time_, &tm_)); } - bool handle(std::tm* tm) { return tm != nullptr; } + auto handle(std::tm* tm) -> bool { return tm != nullptr; } - bool handle(detail::null<>) { + auto handle(detail::null<>) -> bool { using namespace fmt::detail; return fallback(gmtime_s(&tm_, &time_)); } - bool fallback(int res) { return res == 0; } + auto fallback(int res) -> bool { return res == 0; } #if !FMT_MSC_VERSION - bool fallback(detail::null<>) { + auto fallback(detail::null<>) -> bool { std::tm* tm = std::gmtime(&time_); if (tm) tm_ = *tm; return tm != nullptr; } #endif }; - dispatcher gt(time); + auto gt = dispatcher(time); // Too big time values may be unsupported. if (!gt.run()) FMT_THROW(format_error("time_t value out of range")); return gt.tm_; } -inline std::tm gmtime( - std::chrono::time_point time_point) { - return gmtime(std::chrono::system_clock::to_time_t(time_point)); +template +inline auto gmtime( + std::chrono::time_point time_point) + -> std::tm { + return gmtime(detail::to_time_t(time_point)); } -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { // Writes two-digit numbers a, b and c separated by sep to buf. // The method by Pavel Novikov based on @@ -579,7 +643,8 @@ inline void write_digit2_separated(char* buf, unsigned a, unsigned b, } } -template FMT_CONSTEXPR inline const char* get_units() { +template +FMT_CONSTEXPR inline auto get_units() -> const char* { if (std::is_same::value) return "as"; if (std::is_same::value) return "fs"; if (std::is_same::value) return "ps"; @@ -597,8 +662,9 @@ template FMT_CONSTEXPR inline const char* get_units() { if (std::is_same::value) return "Ts"; if (std::is_same::value) return "Ps"; if (std::is_same::value) return "Es"; - if (std::is_same>::value) return "m"; + if (std::is_same>::value) return "min"; if (std::is_same>::value) return "h"; + if (std::is_same>::value) return "d"; return nullptr; } @@ -608,13 +674,37 @@ enum class numeric_system { alternative }; +// Glibc extensions for formatting numeric values. +enum class pad_type { + // Pad a numeric result string with zeros (the default). + zero, + // Do not pad a numeric result string. + none, + // Pad a numeric result string with spaces. + space, +}; + +template +auto write_padding(OutputIt out, pad_type pad, int width) -> OutputIt { + if (pad == pad_type::none) return out; + return detail::fill_n(out, width, pad == pad_type::space ? ' ' : '0'); +} + +template +auto write_padding(OutputIt out, pad_type pad) -> OutputIt { + if (pad != pad_type::none) *out++ = pad == pad_type::space ? ' ' : '0'; + return out; +} + // Parses a put_time-like format string and invokes handler actions. template -FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, - const Char* end, - Handler&& handler) { +FMT_CONSTEXPR auto parse_chrono_format(const Char* begin, const Char* end, + Handler&& handler) -> const Char* { + if (begin == end || *begin == '}') return begin; + if (*begin != '%') FMT_THROW(format_error("invalid format")); auto ptr = begin; while (ptr != end) { + pad_type pad = pad_type::zero; auto c = *ptr; if (c == '}') break; if (c != '%') { @@ -624,6 +714,18 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, if (begin != ptr) handler.on_text(begin, ptr); ++ptr; // consume '%' if (ptr == end) FMT_THROW(format_error("invalid format")); + c = *ptr; + switch (c) { + case '_': + pad = pad_type::space; + ++ptr; + break; + case '-': + pad = pad_type::none; + ++ptr; + break; + } + if (ptr == end) FMT_THROW(format_error("invalid format")); c = *ptr++; switch (c) { case '%': @@ -681,35 +783,35 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, break; // Day of the year/month: case 'U': - handler.on_dec0_week_of_year(numeric_system::standard); + handler.on_dec0_week_of_year(numeric_system::standard, pad); break; case 'W': - handler.on_dec1_week_of_year(numeric_system::standard); + handler.on_dec1_week_of_year(numeric_system::standard, pad); break; case 'V': - handler.on_iso_week_of_year(numeric_system::standard); + handler.on_iso_week_of_year(numeric_system::standard, pad); break; case 'j': handler.on_day_of_year(); break; case 'd': - handler.on_day_of_month(numeric_system::standard); + handler.on_day_of_month(numeric_system::standard, pad); break; case 'e': - handler.on_day_of_month_space(numeric_system::standard); + handler.on_day_of_month(numeric_system::standard, pad_type::space); break; // Hour, minute, second: case 'H': - handler.on_24_hour(numeric_system::standard); + handler.on_24_hour(numeric_system::standard, pad); break; case 'I': - handler.on_12_hour(numeric_system::standard); + handler.on_12_hour(numeric_system::standard, pad); break; case 'M': - handler.on_minute(numeric_system::standard); + handler.on_minute(numeric_system::standard, pad); break; case 'S': - handler.on_second(numeric_system::standard); + handler.on_second(numeric_system::standard, pad); break; // Other: case 'c': @@ -746,7 +848,7 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_duration_unit(); break; case 'z': - handler.on_utc_offset(); + handler.on_utc_offset(numeric_system::standard); break; case 'Z': handler.on_tz_name(); @@ -774,6 +876,9 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, case 'X': handler.on_loc_time(numeric_system::alternative); break; + case 'z': + handler.on_utc_offset(numeric_system::alternative); + break; default: FMT_THROW(format_error("invalid format")); } @@ -790,19 +895,19 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_dec_month(numeric_system::alternative); break; case 'U': - handler.on_dec0_week_of_year(numeric_system::alternative); + handler.on_dec0_week_of_year(numeric_system::alternative, pad); break; case 'W': - handler.on_dec1_week_of_year(numeric_system::alternative); + handler.on_dec1_week_of_year(numeric_system::alternative, pad); break; case 'V': - handler.on_iso_week_of_year(numeric_system::alternative); + handler.on_iso_week_of_year(numeric_system::alternative, pad); break; case 'd': - handler.on_day_of_month(numeric_system::alternative); + handler.on_day_of_month(numeric_system::alternative, pad); break; case 'e': - handler.on_day_of_month_space(numeric_system::alternative); + handler.on_day_of_month(numeric_system::alternative, pad_type::space); break; case 'w': handler.on_dec0_weekday(numeric_system::alternative); @@ -811,16 +916,19 @@ FMT_CONSTEXPR const Char* parse_chrono_format(const Char* begin, handler.on_dec1_weekday(numeric_system::alternative); break; case 'H': - handler.on_24_hour(numeric_system::alternative); + handler.on_24_hour(numeric_system::alternative, pad); break; case 'I': - handler.on_12_hour(numeric_system::alternative); + handler.on_12_hour(numeric_system::alternative, pad); break; case 'M': - handler.on_minute(numeric_system::alternative); + handler.on_minute(numeric_system::alternative, pad); break; case 'S': - handler.on_second(numeric_system::alternative); + handler.on_second(numeric_system::alternative, pad); + break; + case 'z': + handler.on_utc_offset(numeric_system::alternative); break; default: FMT_THROW(format_error("invalid format")); @@ -852,12 +960,19 @@ template struct null_chrono_spec_handler { FMT_CONSTEXPR void on_abbr_month() { unsupported(); } FMT_CONSTEXPR void on_full_month() { unsupported(); } FMT_CONSTEXPR void on_dec_month(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) { + unsupported(); + } + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) { + unsupported(); + } + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) { + unsupported(); + } FMT_CONSTEXPR void on_day_of_year() { unsupported(); } - FMT_CONSTEXPR void on_day_of_month(numeric_system) { unsupported(); } - FMT_CONSTEXPR void on_day_of_month_space(numeric_system) { unsupported(); } + FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) { + unsupported(); + } FMT_CONSTEXPR void on_24_hour(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_12_hour(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_minute(numeric_system) { unsupported(); } @@ -873,7 +988,7 @@ template struct null_chrono_spec_handler { FMT_CONSTEXPR void on_am_pm() { unsupported(); } FMT_CONSTEXPR void on_duration_value() { unsupported(); } FMT_CONSTEXPR void on_duration_unit() { unsupported(); } - FMT_CONSTEXPR void on_utc_offset() { unsupported(); } + FMT_CONSTEXPR void on_utc_offset(numeric_system) { unsupported(); } FMT_CONSTEXPR void on_tz_name() { unsupported(); } }; @@ -895,16 +1010,15 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_abbr_month() {} FMT_CONSTEXPR void on_full_month() {} FMT_CONSTEXPR void on_dec_month(numeric_system) {} - FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system) {} - FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system) {} - FMT_CONSTEXPR void on_iso_week_of_year(numeric_system) {} + FMT_CONSTEXPR void on_dec0_week_of_year(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_dec1_week_of_year(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_iso_week_of_year(numeric_system, pad_type) {} FMT_CONSTEXPR void on_day_of_year() {} - FMT_CONSTEXPR void on_day_of_month(numeric_system) {} - FMT_CONSTEXPR void on_day_of_month_space(numeric_system) {} - FMT_CONSTEXPR void on_24_hour(numeric_system) {} - FMT_CONSTEXPR void on_12_hour(numeric_system) {} - FMT_CONSTEXPR void on_minute(numeric_system) {} - FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_day_of_month(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_second(numeric_system, pad_type) {} FMT_CONSTEXPR void on_datetime(numeric_system) {} FMT_CONSTEXPR void on_loc_date(numeric_system) {} FMT_CONSTEXPR void on_loc_time(numeric_system) {} @@ -914,29 +1028,29 @@ struct tm_format_checker : null_chrono_spec_handler { FMT_CONSTEXPR void on_24_hour_time() {} FMT_CONSTEXPR void on_iso_time() {} FMT_CONSTEXPR void on_am_pm() {} - FMT_CONSTEXPR void on_utc_offset() {} + FMT_CONSTEXPR void on_utc_offset(numeric_system) {} FMT_CONSTEXPR void on_tz_name() {} }; -inline const char* tm_wday_full_name(int wday) { +inline auto tm_wday_full_name(int wday) -> const char* { static constexpr const char* full_name_list[] = { "Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}; return wday >= 0 && wday <= 6 ? full_name_list[wday] : "?"; } -inline const char* tm_wday_short_name(int wday) { +inline auto tm_wday_short_name(int wday) -> const char* { static constexpr const char* short_name_list[] = {"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"}; return wday >= 0 && wday <= 6 ? short_name_list[wday] : "???"; } -inline const char* tm_mon_full_name(int mon) { +inline auto tm_mon_full_name(int mon) -> const char* { static constexpr const char* full_name_list[] = { "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"}; return mon >= 0 && mon <= 11 ? full_name_list[mon] : "?"; } -inline const char* tm_mon_short_name(int mon) { +inline auto tm_mon_short_name(int mon) -> const char* { static constexpr const char* short_name_list[] = { "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", @@ -966,13 +1080,135 @@ inline void tzset_once() { } #endif -template class tm_writer { +// Converts value to Int and checks that it's in the range [0, upper). +template ::value)> +inline auto to_nonnegative_int(T value, Int upper) -> Int { + if (!std::is_unsigned::value && + (value < 0 || to_unsigned(value) > to_unsigned(upper))) { + FMT_THROW(fmt::format_error("chrono value is out of range")); + } + return static_cast(value); +} +template ::value)> +inline auto to_nonnegative_int(T value, Int upper) -> Int { + auto int_value = static_cast(value); + if (int_value < 0 || value > static_cast(upper)) + FMT_THROW(format_error("invalid value")); + return int_value; +} + +constexpr auto pow10(std::uint32_t n) -> long long { + return n == 0 ? 1 : 10 * pow10(n - 1); +} + +// Counts the number of fractional digits in the range [0, 18] according to the +// C++20 spec. If more than 18 fractional digits are required then returns 6 for +// microseconds precision. +template () / 10)> +struct count_fractional_digits { + static constexpr int value = + Num % Den == 0 ? N : count_fractional_digits::value; +}; + +// Base case that doesn't instantiate any more templates +// in order to avoid overflow. +template +struct count_fractional_digits { + static constexpr int value = (Num % Den == 0) ? N : 6; +}; + +// Format subseconds which are given as an integer type with an appropriate +// number of digits. +template +void write_fractional_seconds(OutputIt& out, Duration d, int precision = -1) { + constexpr auto num_fractional_digits = + count_fractional_digits::value; + + using subsecond_precision = std::chrono::duration< + typename std::common_type::type, + std::ratio<1, detail::pow10(num_fractional_digits)>>; + + const auto fractional = d - fmt_duration_cast(d); + const auto subseconds = + std::chrono::treat_as_floating_point< + typename subsecond_precision::rep>::value + ? fractional.count() + : fmt_duration_cast(fractional).count(); + auto n = static_cast>(subseconds); + const int num_digits = detail::count_digits(n); + + int leading_zeroes = (std::max)(0, num_fractional_digits - num_digits); + if (precision < 0) { + FMT_ASSERT(!std::is_floating_point::value, ""); + if (std::ratio_less::value) { + *out++ = '.'; + out = detail::fill_n(out, leading_zeroes, '0'); + out = format_decimal(out, n, num_digits).end; + } + } else if (precision > 0) { + *out++ = '.'; + leading_zeroes = (std::min)(leading_zeroes, precision); + int remaining = precision - leading_zeroes; + out = detail::fill_n(out, leading_zeroes, '0'); + if (remaining < num_digits) { + int num_truncated_digits = num_digits - remaining; + n /= to_unsigned(detail::pow10(to_unsigned(num_truncated_digits))); + if (n) { + out = format_decimal(out, n, remaining).end; + } + return; + } + if (n) { + out = format_decimal(out, n, num_digits).end; + remaining -= num_digits; + } + out = detail::fill_n(out, remaining, '0'); + } +} + +// Format subseconds which are given as a floating point type with an +// appropriate number of digits. We cannot pass the Duration here, as we +// explicitly need to pass the Rep value in the chrono_formatter. +template +void write_floating_seconds(memory_buffer& buf, Duration duration, + int num_fractional_digits = -1) { + using rep = typename Duration::rep; + FMT_ASSERT(std::is_floating_point::value, ""); + + auto val = duration.count(); + + if (num_fractional_digits < 0) { + // For `std::round` with fallback to `round`: + // On some toolchains `std::round` is not available (e.g. GCC 6). + using namespace std; + num_fractional_digits = + count_fractional_digits::value; + if (num_fractional_digits < 6 && static_cast(round(val)) != val) + num_fractional_digits = 6; + } + + fmt::format_to(std::back_inserter(buf), FMT_STRING("{:.{}f}"), + std::fmod(val * static_cast(Duration::period::num) / + static_cast(Duration::period::den), + static_cast(60)), + num_fractional_digits); +} + +template +class tm_writer { private: static constexpr int days_per_week = 7; const std::locale& loc_; const bool is_classic_; OutputIt out_; + const Duration* subsecs_; const std::tm& tm_; auto tm_sec() const noexcept -> int { @@ -1021,8 +1257,7 @@ template class tm_writer { return static_cast(l); } - // Algorithm: - // https://en.wikipedia.org/wiki/ISO_week_date#Calculating_the_week_number_from_a_month_and_day_of_the_month_or_ordinal_date + // Algorithm: https://en.wikipedia.org/wiki/ISO_week_date. auto iso_year_weeks(long long curr_year) const noexcept -> int { const auto prev_year = curr_year - 1; const auto curr_p = @@ -1060,6 +1295,17 @@ template class tm_writer { *out_++ = *d++; *out_++ = *d; } + void write2(int value, pad_type pad) { + unsigned int v = to_unsigned(value) % 100; + if (v >= 10) { + const char* d = digits2(v); + *out_++ = *d++; + *out_++ = *d; + } else { + out_ = detail::write_padding(out_, pad); + *out_++ = static_cast('0' + v); + } + } void write_year_extended(long long year) { // At least 4 characters. @@ -1071,7 +1317,8 @@ template class tm_writer { } uint32_or_64_or_128_t n = to_unsigned(year); const int num_digits = count_digits(n); - if (width > num_digits) out_ = std::fill_n(out_, width - num_digits, '0'); + if (width > num_digits) + out_ = detail::fill_n(out_, width - num_digits, '0'); out_ = format_decimal(out_, n, num_digits).end; } void write_year(long long year) { @@ -1083,7 +1330,7 @@ template class tm_writer { } } - void write_utc_offset(long offset) { + void write_utc_offset(long offset, numeric_system ns) { if (offset < 0) { *out_++ = '-'; offset = -offset; @@ -1092,14 +1339,15 @@ template class tm_writer { } offset /= 60; write2(static_cast(offset / 60)); + if (ns != numeric_system::standard) *out_++ = ':'; write2(static_cast(offset % 60)); } template ::value)> - void format_utc_offset_impl(const T& tm) { - write_utc_offset(tm.tm_gmtoff); + void format_utc_offset_impl(const T& tm, numeric_system ns) { + write_utc_offset(tm.tm_gmtoff, ns); } template ::value)> - void format_utc_offset_impl(const T& tm) { + void format_utc_offset_impl(const T& tm, numeric_system ns) { #if defined(_WIN32) && defined(_UCRT) # if FMT_USE_TZSET tzset_once(); @@ -1111,10 +1359,17 @@ template class tm_writer { _get_dstbias(&dstbias); offset += dstbias; } - write_utc_offset(-offset); + write_utc_offset(-offset, ns); #else - ignore_unused(tm); - format_localized('z'); + if (ns == numeric_system::standard) return format_localized('z'); + + // Extract timezone offset from timezone conversion functions. + std::tm gtm = tm; + std::time_t gt = std::mktime(>m); + std::tm ltm = gmtime(gt); + std::time_t lt = std::mktime(<m); + long offset = gt - lt; + write_utc_offset(offset, ns); #endif } @@ -1135,16 +1390,18 @@ template class tm_writer { } public: - tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm) + tm_writer(const std::locale& loc, OutputIt out, const std::tm& tm, + const Duration* subsecs = nullptr) : loc_(loc), is_classic_(loc_ == get_classic_locale()), out_(out), + subsecs_(subsecs), tm_(tm) {} - OutputIt out() const { return out_; } + auto out() const -> OutputIt { return out_; } FMT_CONSTEXPR void on_text(const Char* begin, const Char* end) { - out_ = copy_str(begin, end, out_); + out_ = copy(begin, end, out_); } void on_abbr_weekday() { @@ -1191,7 +1448,7 @@ template class tm_writer { *out_++ = ' '; on_abbr_month(); *out_++ = ' '; - on_day_of_month_space(numeric_system::standard); + on_day_of_month(numeric_system::standard, pad_type::space); *out_++ = ' '; on_iso_time(); *out_++ = ' '; @@ -1217,7 +1474,7 @@ template class tm_writer { write_digit2_separated(buf, to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()), to_unsigned(split_year_lower(tm_year())), '/'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + out_ = copy(std::begin(buf), std::end(buf), out_); } void on_iso_date() { auto year = tm_year(); @@ -1233,10 +1490,10 @@ template class tm_writer { write_digit2_separated(buf + 2, static_cast(year % 100), to_unsigned(tm_mon() + 1), to_unsigned(tm_mday()), '-'); - out_ = copy_str(std::begin(buf) + offset, std::end(buf), out_); + out_ = copy(std::begin(buf) + offset, std::end(buf), out_); } - void on_utc_offset() { format_utc_offset_impl(tm_); } + void on_utc_offset(numeric_system ns) { format_utc_offset_impl(tm_, ns); } void on_tz_name() { format_tz_name_impl(tm_); } void on_year(numeric_system ns) { @@ -1278,24 +1535,26 @@ template class tm_writer { format_localized('m', 'O'); } - void on_dec0_week_of_year(numeric_system ns) { + void on_dec0_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week); + return write2((tm_yday() + days_per_week - tm_wday()) / days_per_week, + pad); format_localized('U', 'O'); } - void on_dec1_week_of_year(numeric_system ns) { + void on_dec1_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) { auto wday = tm_wday(); write2((tm_yday() + days_per_week - (wday == 0 ? (days_per_week - 1) : (wday - 1))) / - days_per_week); + days_per_week, + pad); } else { format_localized('W', 'O'); } } - void on_iso_week_of_year(numeric_system ns) { + void on_iso_week_of_year(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2(tm_iso_week_of_year()); + return write2(tm_iso_week_of_year(), pad); format_localized('V', 'O'); } @@ -1309,37 +1568,47 @@ template class tm_writer { write1(yday / 100); write2(yday % 100); } - void on_day_of_month(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_mday()); + void on_day_of_month(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_mday(), pad); format_localized('d', 'O'); } - void on_day_of_month_space(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) { - auto mday = to_unsigned(tm_mday()) % 100; - const char* d2 = digits2(mday); - *out_++ = mday < 10 ? ' ' : d2[0]; - *out_++ = d2[1]; - } else { - format_localized('e', 'O'); - } - } - void on_24_hour(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_hour()); + void on_24_hour(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_hour(), pad); format_localized('H', 'O'); } - void on_12_hour(numeric_system ns) { + void on_12_hour(numeric_system ns, pad_type pad) { if (is_classic_ || ns == numeric_system::standard) - return write2(tm_hour12()); + return write2(tm_hour12(), pad); format_localized('I', 'O'); } - void on_minute(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_min()); + void on_minute(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) + return write2(tm_min(), pad); format_localized('M', 'O'); } - void on_second(numeric_system ns) { - if (is_classic_ || ns == numeric_system::standard) return write2(tm_sec()); - format_localized('S', 'O'); + + void on_second(numeric_system ns, pad_type pad) { + if (is_classic_ || ns == numeric_system::standard) { + write2(tm_sec(), pad); + if (subsecs_) { + if (std::is_floating_point::value) { + auto buf = memory_buffer(); + write_floating_seconds(buf, *subsecs_); + if (buf.size() > 1) { + // Remove the leading "0", write something like ".123". + out_ = std::copy(buf.begin() + 1, buf.end(), out_); + } + } else { + write_fractional_seconds(out_, *subsecs_); + } + } + } else { + // Currently no formatting of subseconds when a locale is set. + format_localized('S', 'O'); + } } void on_12_hour_time() { @@ -1347,7 +1616,7 @@ template class tm_writer { char buf[8]; write_digit2_separated(buf, to_unsigned(tm_hour12()), to_unsigned(tm_min()), to_unsigned(tm_sec()), ':'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + out_ = copy(std::begin(buf), std::end(buf), out_); *out_++ = ' '; on_am_pm(); } else { @@ -1360,10 +1629,9 @@ template class tm_writer { write2(tm_min()); } void on_iso_time() { - char buf[8]; - write_digit2_separated(buf, to_unsigned(tm_hour()), to_unsigned(tm_min()), - to_unsigned(tm_sec()), ':'); - out_ = copy_str(std::begin(buf), std::end(buf), out_); + on_24_hour_time(); + *out_++ = ':'; + on_second(numeric_system::standard, pad_type::zero); } void on_am_pm() { @@ -1381,49 +1649,41 @@ template class tm_writer { }; struct chrono_format_checker : null_chrono_spec_handler { + bool has_precision_integral = false; + FMT_NORETURN void unsupported() { FMT_THROW(format_error("no date")); } template FMT_CONSTEXPR void on_text(const Char*, const Char*) {} - FMT_CONSTEXPR void on_24_hour(numeric_system) {} - FMT_CONSTEXPR void on_12_hour(numeric_system) {} - FMT_CONSTEXPR void on_minute(numeric_system) {} - FMT_CONSTEXPR void on_second(numeric_system) {} + FMT_CONSTEXPR void on_day_of_year() {} + FMT_CONSTEXPR void on_24_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_12_hour(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_minute(numeric_system, pad_type) {} + FMT_CONSTEXPR void on_second(numeric_system, pad_type) {} FMT_CONSTEXPR void on_12_hour_time() {} FMT_CONSTEXPR void on_24_hour_time() {} FMT_CONSTEXPR void on_iso_time() {} FMT_CONSTEXPR void on_am_pm() {} - FMT_CONSTEXPR void on_duration_value() {} + FMT_CONSTEXPR void on_duration_value() const { + if (has_precision_integral) { + FMT_THROW(format_error("precision not allowed for this argument type")); + } + } FMT_CONSTEXPR void on_duration_unit() {} }; -template ::value)> -inline bool isfinite(T) { +template ::value&& has_isfinite::value)> +inline auto isfinite(T) -> bool { return true; } -// Converts value to Int and checks that it's in the range [0, upper). -template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - FMT_ASSERT(std::is_unsigned::value || - (value >= 0 && to_unsigned(value) <= to_unsigned(upper)), - "invalid value"); - (void)upper; - return static_cast(value); -} -template ::value)> -inline Int to_nonnegative_int(T value, Int upper) { - if (value < 0 || value > static_cast(upper)) - FMT_THROW(format_error("invalid value")); - return static_cast(value); -} - template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return x % static_cast(y); } template ::value)> -inline T mod(T x, int y) { +inline auto mod(T x, int y) -> T { return std::fmod(x, static_cast(y)); } @@ -1438,104 +1698,52 @@ template struct make_unsigned_or_unchanged { using type = typename std::make_unsigned::type; }; -#if FMT_SAFE_DURATION_CAST -// throwing version of safe_duration_cast -template -To fmt_safe_duration_cast(std::chrono::duration from) { - int ec; - To to = safe_duration_cast::safe_duration_cast(from, ec); - if (ec) FMT_THROW(format_error("cannot format duration")); - return to; -} -#endif - template ::value)> -inline std::chrono::duration get_milliseconds( - std::chrono::duration d) { +inline auto get_milliseconds(std::chrono::duration d) + -> std::chrono::duration { // this may overflow and/or the result may not fit in the // target type. #if FMT_SAFE_DURATION_CAST using CommonSecondsType = typename std::common_type::type; - const auto d_as_common = fmt_safe_duration_cast(d); + const auto d_as_common = fmt_duration_cast(d); const auto d_as_whole_seconds = - fmt_safe_duration_cast(d_as_common); + fmt_duration_cast(d_as_common); // this conversion should be nonproblematic const auto diff = d_as_common - d_as_whole_seconds; const auto ms = - fmt_safe_duration_cast>(diff); + fmt_duration_cast>(diff); return ms; #else - auto s = std::chrono::duration_cast(d); - return std::chrono::duration_cast(d - s); + auto s = fmt_duration_cast(d); + return fmt_duration_cast(d - s); #endif } -// Counts the number of fractional digits in the range [0, 18] according to the -// C++20 spec. If more than 18 fractional digits are required then returns 6 for -// microseconds precision. -template () / 10)> -struct count_fractional_digits { - static constexpr int value = - Num % Den == 0 ? N : count_fractional_digits::value; -}; - -// Base case that doesn't instantiate any more templates -// in order to avoid overflow. -template -struct count_fractional_digits { - static constexpr int value = (Num % Den == 0) ? N : 6; -}; - -constexpr long long pow10(std::uint32_t n) { - return n == 0 ? 1 : 10 * pow10(n - 1); -} - -template ::is_signed)> -constexpr std::chrono::duration abs( - std::chrono::duration d) { - // We need to compare the duration using the count() method directly - // due to a compiler bug in clang-11 regarding the spaceship operator, - // when -Wzero-as-null-pointer-constant is enabled. - // In clang-12 the bug has been fixed. See - // https://bugs.llvm.org/show_bug.cgi?id=46235 and the reproducible example: - // https://www.godbolt.org/z/Knbb5joYx. - return d.count() >= d.zero().count() ? d : -d; -} - -template ::is_signed)> -constexpr std::chrono::duration abs( - std::chrono::duration d) { - return d; -} - template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int) { +auto format_duration_value(OutputIt out, Rep val, int) -> OutputIt { return write(out, val); } template ::value)> -OutputIt format_duration_value(OutputIt out, Rep val, int precision) { - auto specs = basic_format_specs(); +auto format_duration_value(OutputIt out, Rep val, int precision) -> OutputIt { + auto specs = format_specs(); specs.precision = precision; - specs.type = precision >= 0 ? presentation_type::fixed_lower - : presentation_type::general_lower; + specs.type = + precision >= 0 ? presentation_type::fixed : presentation_type::general; return write(out, val, specs); } template -OutputIt copy_unit(string_view unit, OutputIt out, Char) { +auto copy_unit(string_view unit, OutputIt out, Char) -> OutputIt { return std::copy(unit.begin(), unit.end(), out); } template -OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { +auto copy_unit(string_view unit, OutputIt out, wchar_t) -> OutputIt { // This works when wchar_t is UTF-32 because units only contain characters // that have the same representation in UTF-16 and UTF-32. utf8_to_utf16 u(unit); @@ -1543,7 +1751,7 @@ OutputIt copy_unit(string_view unit, OutputIt out, wchar_t) { } template -OutputIt format_duration_unit(OutputIt out) { +auto format_duration_unit(OutputIt out) -> OutputIt { if (const char* unit = get_units()) return copy_unit(string_view(unit), out, Char()); *out++ = '['; @@ -1566,8 +1774,10 @@ class get_locale { public: get_locale(bool localized, locale_ref loc) : has_locale_(localized) { +#ifndef FMT_STATIC_THOUSANDS_SEPARATOR if (localized) ::new (&locale_) std::locale(loc.template get()); +#endif } ~get_locale() { if (has_locale_) locale_.~locale(); @@ -1610,18 +1820,12 @@ struct chrono_formatter { // this may overflow and/or the result may not fit in the // target type. -#if FMT_SAFE_DURATION_CAST // might need checked conversion (rep!=Rep) - auto tmpval = std::chrono::duration(val); - s = fmt_safe_duration_cast(tmpval); -#else - s = std::chrono::duration_cast( - std::chrono::duration(val)); -#endif + s = fmt_duration_cast(std::chrono::duration(val)); } // returns true if nan or inf, writes to out. - bool handle_nan_inf() { + auto handle_nan_inf() -> bool { if (isfinite(val)) { return false; } @@ -1638,17 +1842,22 @@ struct chrono_formatter { return true; } - Rep hour() const { return static_cast(mod((s.count() / 3600), 24)); } + auto days() const -> Rep { return static_cast(s.count() / 86400); } + auto hour() const -> Rep { + return static_cast(mod((s.count() / 3600), 24)); + } - Rep hour12() const { + auto hour12() const -> Rep { Rep hour = static_cast(mod((s.count() / 3600), 12)); return hour <= 0 ? 12 : hour; } - Rep minute() const { return static_cast(mod((s.count() / 60), 60)); } - Rep second() const { return static_cast(mod(s.count(), 60)); } + auto minute() const -> Rep { + return static_cast(mod((s.count() / 60), 60)); + } + auto second() const -> Rep { return static_cast(mod(s.count(), 60)); } - std::tm time() const { + auto time() const -> std::tm { auto time = std::tm(); time.tm_hour = to_nonnegative_int(hour(), 24); time.tm_min = to_nonnegative_int(minute(), 60); @@ -1663,44 +1872,16 @@ struct chrono_formatter { } } - void write(Rep value, int width) { + void write(Rep value, int width, pad_type pad = pad_type::zero) { write_sign(); if (isnan(value)) return write_nan(); uint32_or_64_or_128_t n = to_unsigned(to_nonnegative_int(value, max_value())); int num_digits = detail::count_digits(n); - if (width > num_digits) out = std::fill_n(out, width - num_digits, '0'); - out = format_decimal(out, n, num_digits).end; - } - - template void write_fractional_seconds(Duration d) { - FMT_ASSERT(!std::is_floating_point::value, ""); - constexpr auto num_fractional_digits = - count_fractional_digits::value; - - using subsecond_precision = std::chrono::duration< - typename std::common_type::type, - std::ratio<1, detail::pow10(num_fractional_digits)>>; - if (std::ratio_less::value) { - *out++ = '.'; - auto fractional = - detail::abs(d) - std::chrono::duration_cast(d); - auto subseconds = - std::chrono::treat_as_floating_point< - typename subsecond_precision::rep>::value - ? fractional.count() - : std::chrono::duration_cast(fractional) - .count(); - uint32_or_64_or_128_t n = - to_unsigned(to_nonnegative_int(subseconds, max_value())); - int num_digits = detail::count_digits(n); - if (num_fractional_digits > num_digits) - out = std::fill_n(out, num_fractional_digits - num_digits, '0'); - out = format_decimal(out, n, num_digits).end; + if (width > num_digits) { + out = detail::write_padding(out, pad, width - num_digits); } + out = format_decimal(out, n, num_digits).end; } void write_nan() { std::copy_n("nan", 3, out); } @@ -1732,7 +1913,7 @@ struct chrono_formatter { void on_loc_time(numeric_system) {} void on_us_date() {} void on_iso_date() {} - void on_utc_offset() {} + void on_utc_offset(numeric_system) {} void on_tz_name() {} void on_year(numeric_system) {} void on_short_year(numeric_system) {} @@ -1741,65 +1922,66 @@ struct chrono_formatter { void on_iso_week_based_year() {} void on_iso_week_based_short_year() {} void on_dec_month(numeric_system) {} - void on_dec0_week_of_year(numeric_system) {} - void on_dec1_week_of_year(numeric_system) {} - void on_iso_week_of_year(numeric_system) {} - void on_day_of_year() {} - void on_day_of_month(numeric_system) {} - void on_day_of_month_space(numeric_system) {} - - void on_24_hour(numeric_system ns) { + void on_dec0_week_of_year(numeric_system, pad_type) {} + void on_dec1_week_of_year(numeric_system, pad_type) {} + void on_iso_week_of_year(numeric_system, pad_type) {} + void on_day_of_month(numeric_system, pad_type) {} + + void on_day_of_year() { if (handle_nan_inf()) return; + write(days(), 0); + } - if (ns == numeric_system::standard) return write(hour(), 2); + void on_24_hour(numeric_system ns, pad_type pad) { + if (handle_nan_inf()) return; + + if (ns == numeric_system::standard) return write(hour(), 2, pad); auto time = tm(); time.tm_hour = to_nonnegative_int(hour(), 24); - format_tm(time, &tm_writer_type::on_24_hour, ns); + format_tm(time, &tm_writer_type::on_24_hour, ns, pad); } - void on_12_hour(numeric_system ns) { + void on_12_hour(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; - if (ns == numeric_system::standard) return write(hour12(), 2); + if (ns == numeric_system::standard) return write(hour12(), 2, pad); auto time = tm(); time.tm_hour = to_nonnegative_int(hour12(), 12); - format_tm(time, &tm_writer_type::on_12_hour, ns); + format_tm(time, &tm_writer_type::on_12_hour, ns, pad); } - void on_minute(numeric_system ns) { + void on_minute(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; - if (ns == numeric_system::standard) return write(minute(), 2); + if (ns == numeric_system::standard) return write(minute(), 2, pad); auto time = tm(); time.tm_min = to_nonnegative_int(minute(), 60); - format_tm(time, &tm_writer_type::on_minute, ns); + format_tm(time, &tm_writer_type::on_minute, ns, pad); } - void on_second(numeric_system ns) { + void on_second(numeric_system ns, pad_type pad) { if (handle_nan_inf()) return; if (ns == numeric_system::standard) { if (std::is_floating_point::value) { - constexpr auto num_fractional_digits = - count_fractional_digits::value; auto buf = memory_buffer(); - format_to(std::back_inserter(buf), runtime("{:.{}f}"), - std::fmod(val * static_cast(Period::num) / - static_cast(Period::den), - static_cast(60)), - num_fractional_digits); + write_floating_seconds(buf, std::chrono::duration(val), + precision); if (negative) *out++ = '-'; - if (buf.size() < 2 || buf[1] == '.') *out++ = '0'; + if (buf.size() < 2 || buf[1] == '.') { + out = detail::write_padding(out, pad); + } out = std::copy(buf.begin(), buf.end(), out); } else { - write(second(), 2); - write_fractional_seconds(std::chrono::duration(val)); + write(second(), 2, pad); + write_fractional_seconds( + out, std::chrono::duration(val), precision); } return; } auto time = tm(); time.tm_sec = to_nonnegative_int(second(), 60); - format_tm(time, &tm_writer_type::on_second, ns); + format_tm(time, &tm_writer_type::on_second, ns, pad); } void on_12_hour_time() { @@ -1823,7 +2005,7 @@ struct chrono_formatter { on_24_hour_time(); *out++ = ':'; if (handle_nan_inf()) return; - on_second(numeric_system::standard); + on_second(numeric_system::standard, pad_type::zero); } void on_am_pm() { @@ -1842,168 +2024,279 @@ struct chrono_formatter { } }; -FMT_END_DETAIL_NAMESPACE +} // namespace detail #if defined(__cpp_lib_chrono) && __cpp_lib_chrono >= 201907 using weekday = std::chrono::weekday; +using day = std::chrono::day; +using month = std::chrono::month; +using year = std::chrono::year; +using year_month_day = std::chrono::year_month_day; #else // A fallback version of weekday. class weekday { private: - unsigned char value; + unsigned char value_; public: weekday() = default; - explicit constexpr weekday(unsigned wd) noexcept - : value(static_cast(wd != 7 ? wd : 0)) {} - constexpr unsigned c_encoding() const noexcept { return value; } + constexpr explicit weekday(unsigned wd) noexcept + : value_(static_cast(wd != 7 ? wd : 0)) {} + constexpr auto c_encoding() const noexcept -> unsigned { return value_; } }; -class year_month_day {}; +class day { + private: + unsigned char value_; + + public: + day() = default; + constexpr explicit day(unsigned d) noexcept + : value_(static_cast(d)) {} + constexpr explicit operator unsigned() const noexcept { return value_; } +}; + +class month { + private: + unsigned char value_; + + public: + month() = default; + constexpr explicit month(unsigned m) noexcept + : value_(static_cast(m)) {} + constexpr explicit operator unsigned() const noexcept { return value_; } +}; + +class year { + private: + int value_; + + public: + year() = default; + constexpr explicit year(int y) noexcept : value_(y) {} + constexpr explicit operator int() const noexcept { return value_; } +}; + +class year_month_day { + private: + fmt::year year_; + fmt::month month_; + fmt::day day_; + + public: + year_month_day() = default; + constexpr year_month_day(const year& y, const month& m, const day& d) noexcept + : year_(y), month_(m), day_(d) {} + constexpr auto year() const noexcept -> fmt::year { return year_; } + constexpr auto month() const noexcept -> fmt::month { return month_; } + constexpr auto day() const noexcept -> fmt::day { return day_; } +}; #endif -// A rudimentary weekday formatter. -template struct formatter { +template +struct formatter : private formatter { private: - bool localized = false; + bool localized_ = false; + bool use_tm_formatter_ = false; public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin != end && *begin == 'L') { - ++begin; - localized = true; + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it == 'L') { + ++it; + localized_ = true; + return it; } - return begin; + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; } template auto format(weekday wd, FormatContext& ctx) const -> decltype(ctx.out()) { auto time = std::tm(); time.tm_wday = static_cast(wd.c_encoding()); - detail::get_locale loc(localized, ctx.locale()); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(localized_, ctx.locale()); auto w = detail::tm_writer(loc, ctx.out(), time); w.on_abbr_weekday(); return w.out(); } }; -template -struct formatter, Char> { +template +struct formatter : private formatter { private: - basic_format_specs specs; - int precision = -1; - using arg_ref_type = detail::arg_ref; - arg_ref_type width_ref; - arg_ref_type precision_ref; - bool localized = false; - basic_string_view format_str; - using duration = std::chrono::duration; + bool use_tm_formatter_ = false; - struct spec_handler { - formatter& f; - basic_format_parse_context& context; - basic_string_view format_str; + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - template FMT_CONSTEXPR arg_ref_type make_arg_ref(Id arg_id) { - context.check_arg_id(arg_id); - return arg_ref_type(arg_id); - } + template + auto format(day d, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_mday = static_cast(static_cast(d)); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(false, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_day_of_month(detail::numeric_system::standard, detail::pad_type::zero); + return w.out(); + } +}; - FMT_CONSTEXPR arg_ref_type make_arg_ref(basic_string_view arg_id) { - context.check_arg_id(arg_id); - return arg_ref_type(arg_id); - } +template +struct formatter : private formatter { + private: + bool localized_ = false; + bool use_tm_formatter_ = false; - FMT_CONSTEXPR arg_ref_type make_arg_ref(detail::auto_id) { - return arg_ref_type(context.next_arg_id()); + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + if (it != end && *it == 'L') { + ++it; + localized_ = true; + return it; } + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - void on_error(const char* msg) { FMT_THROW(format_error(msg)); } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - f.specs.fill = fill; - } - FMT_CONSTEXPR void on_align(align_t align) { f.specs.align = align; } - FMT_CONSTEXPR void on_width(int width) { f.specs.width = width; } - FMT_CONSTEXPR void on_precision(int _precision) { - f.precision = _precision; - } - FMT_CONSTEXPR void end_precision() {} + template + auto format(month m, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_mon = static_cast(static_cast(m)) - 1; + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(localized_, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_abbr_month(); + return w.out(); + } +}; - template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { - f.width_ref = make_arg_ref(arg_id); - } +template +struct formatter : private formatter { + private: + bool use_tm_formatter_ = false; - template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { - f.precision_ref = make_arg_ref(arg_id); - } - }; + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; + } - using iterator = typename basic_format_parse_context::iterator; - struct parse_range { - iterator begin; - iterator end; - }; + template + auto format(year y, FormatContext& ctx) const -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_year = static_cast(y) - 1900; + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(false, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_year(detail::numeric_system::standard); + return w.out(); + } +}; - FMT_CONSTEXPR parse_range do_parse(basic_format_parse_context& ctx) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin == end || *begin == '}') return {begin, begin}; - spec_handler handler{*this, ctx, format_str}; - begin = detail::parse_align(begin, end, handler); - if (begin == end) return {begin, begin}; - begin = detail::parse_width(begin, end, handler); - if (begin == end) return {begin, begin}; - if (*begin == '.') { - if (std::is_floating_point::value) - begin = detail::parse_precision(begin, end, handler); - else - handler.on_error("precision not allowed for this argument type"); - } - if (begin != end && *begin == 'L') { - ++begin; - localized = true; - } - end = detail::parse_chrono_format(begin, end, - detail::chrono_format_checker()); - return {begin, end}; +template +struct formatter : private formatter { + private: + bool use_tm_formatter_ = false; + + public: + FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) + -> decltype(ctx.begin()) { + auto it = ctx.begin(), end = ctx.end(); + use_tm_formatter_ = it != end && *it != '}'; + return use_tm_formatter_ ? formatter::parse(ctx) : it; } + template + auto format(year_month_day val, FormatContext& ctx) const + -> decltype(ctx.out()) { + auto time = std::tm(); + time.tm_year = static_cast(val.year()) - 1900; + time.tm_mon = static_cast(static_cast(val.month())) - 1; + time.tm_mday = static_cast(static_cast(val.day())); + if (use_tm_formatter_) return formatter::format(time, ctx); + detail::get_locale loc(true, ctx.locale()); + auto w = detail::tm_writer(loc, ctx.out(), time); + w.on_iso_date(); + return w.out(); + } +}; + +template +struct formatter, Char> { + private: + format_specs specs_; + detail::arg_ref width_ref_; + detail::arg_ref precision_ref_; + bool localized_ = false; + basic_string_view format_str_; + public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto range = do_parse(ctx); - format_str = basic_string_view( - &*range.begin, detail::to_unsigned(range.end - range.begin)); - return range.end; + auto it = ctx.begin(), end = ctx.end(); + if (it == end || *it == '}') return it; + + it = detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = detail::parse_dynamic_spec(it, end, specs_.width, width_ref_, ctx); + if (it == end) return it; + + auto checker = detail::chrono_format_checker(); + if (*it == '.') { + checker.has_precision_integral = !std::is_floating_point::value; + it = detail::parse_precision(it, end, specs_.precision, precision_ref_, + ctx); + } + if (it != end && *it == 'L') { + localized_ = true; + ++it; + } + end = detail::parse_chrono_format(it, end, checker); + format_str_ = {it, detail::to_unsigned(end - it)}; + return end; } template - auto format(const duration& d, FormatContext& ctx) const + auto format(std::chrono::duration d, FormatContext& ctx) const -> decltype(ctx.out()) { - auto specs_copy = specs; - auto precision_copy = precision; - auto begin = format_str.begin(), end = format_str.end(); + auto specs = specs_; + auto precision = specs.precision; + specs.precision = -1; + auto begin = format_str_.begin(), end = format_str_.end(); // As a possible future optimization, we could avoid extra copying if width // is not specified. - basic_memory_buffer buf; + auto buf = basic_memory_buffer(); auto out = std::back_inserter(buf); - detail::handle_dynamic_spec(specs_copy.width, - width_ref, ctx); - detail::handle_dynamic_spec(precision_copy, - precision_ref, ctx); + detail::handle_dynamic_spec(specs.width, width_ref_, + ctx); + detail::handle_dynamic_spec(precision, + precision_ref_, ctx); if (begin == end || *begin == '}') { - out = detail::format_duration_value(out, d.count(), precision_copy); + out = detail::format_duration_value(out, d.count(), precision); detail::format_duration_unit(out); } else { - detail::chrono_formatter f( - ctx, out, d); - f.precision = precision_copy; - f.localized = localized; + using chrono_formatter = + detail::chrono_formatter; + auto f = chrono_formatter(ctx, out, d); + f.precision = precision; + f.localized = localized_; detail::parse_chrono_format(begin, end, f); } return detail::write( - ctx.out(), basic_string_view(buf.data(), buf.size()), specs_copy); + ctx.out(), basic_string_view(buf.data(), buf.size()), specs); } }; @@ -2011,87 +2304,129 @@ template struct formatter, Char> : formatter { FMT_CONSTEXPR formatter() { - basic_string_view default_specs = - detail::string_literal{}; - this->do_parse(default_specs.begin(), default_specs.end()); + this->format_str_ = detail::string_literal{}; } template - auto format(std::chrono::time_point val, + auto format(std::chrono::time_point val, FormatContext& ctx) const -> decltype(ctx.out()) { - return formatter::format(localtime(val), ctx); + std::tm tm = gmtime(val); + using period = typename Duration::period; + if (detail::const_check( + period::num == 1 && period::den == 1 && + !std::is_floating_point::value)) { + return formatter::format(tm, ctx); + } + Duration epoch = val.time_since_epoch(); + Duration subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); + if (subsecs.count() < 0) { + auto second = + detail::fmt_duration_cast(std::chrono::seconds(1)); + if (tm.tm_sec != 0) + --tm.tm_sec; + else + tm = gmtime(val - second); + subsecs += detail::fmt_duration_cast(std::chrono::seconds(1)); + } + return formatter::do_format(tm, ctx, &subsecs); } }; -#if FMT_USE_UTC_TIME +#if FMT_USE_LOCAL_TIME template -struct formatter, - Char> : formatter { +struct formatter, Char> + : formatter { FMT_CONSTEXPR formatter() { - basic_string_view default_specs = - detail::string_literal{}; - this->do_parse(default_specs.begin(), default_specs.end()); + this->format_str_ = detail::string_literal{}; } template - auto format(std::chrono::time_point val, + auto format(std::chrono::local_time val, FormatContext& ctx) const + -> decltype(ctx.out()) { + using period = typename Duration::period; + if (period::num != 1 || period::den != 1 || + std::is_floating_point::value) { + const auto epoch = val.time_since_epoch(); + const auto subsecs = detail::fmt_duration_cast( + epoch - detail::fmt_duration_cast(epoch)); + + return formatter::do_format(localtime(val), ctx, &subsecs); + } + + return formatter::format(localtime(val), ctx); + } +}; +#endif + +#if FMT_USE_UTC_TIME +template +struct formatter, + Char> + : formatter, + Char> { + template + auto format(std::chrono::time_point val, FormatContext& ctx) const -> decltype(ctx.out()) { - return formatter::format( - localtime(std::chrono::utc_clock::to_sys(val)), ctx); + return formatter< + std::chrono::time_point, + Char>::format(std::chrono::utc_clock::to_sys(val), ctx); } }; #endif template struct formatter { private: - enum class spec { - unknown, - year_month_day, - hh_mm_ss, - }; - spec spec_ = spec::unknown; - basic_string_view specs; + format_specs specs_; + detail::arg_ref width_ref_; protected: - template FMT_CONSTEXPR auto do_parse(It begin, It end) -> It { - if (begin != end && *begin == ':') ++begin; - end = detail::parse_chrono_format(begin, end, detail::tm_format_checker()); - // Replace default spec only if the new spec is not empty. - if (end != begin) specs = {begin, detail::to_unsigned(end - begin)}; - return end; + basic_string_view format_str_; + + template + auto do_format(const std::tm& tm, FormatContext& ctx, + const Duration* subsecs) const -> decltype(ctx.out()) { + auto specs = specs_; + auto buf = basic_memory_buffer(); + auto out = std::back_inserter(buf); + detail::handle_dynamic_spec(specs.width, width_ref_, + ctx); + + auto loc_ref = ctx.locale(); + detail::get_locale loc(static_cast(loc_ref), loc_ref); + auto w = + detail::tm_writer(loc, out, tm, subsecs); + detail::parse_chrono_format(format_str_.begin(), format_str_.end(), w); + return detail::write( + ctx.out(), basic_string_view(buf.data(), buf.size()), specs); } public: FMT_CONSTEXPR auto parse(basic_format_parse_context& ctx) -> decltype(ctx.begin()) { - auto end = this->do_parse(ctx.begin(), ctx.end()); - // basic_string_view<>::compare isn't constexpr before C++17. - if (specs.size() == 2 && specs[0] == Char('%')) { - if (specs[1] == Char('F')) - spec_ = spec::year_month_day; - else if (specs[1] == Char('T')) - spec_ = spec::hh_mm_ss; - } + auto it = ctx.begin(), end = ctx.end(); + if (it == end || *it == '}') return it; + + it = detail::parse_align(it, end, specs_); + if (it == end) return it; + + it = detail::parse_dynamic_spec(it, end, specs_.width, width_ref_, ctx); + if (it == end) return it; + + end = detail::parse_chrono_format(it, end, detail::tm_format_checker()); + // Replace the default format_str only if the new spec is not empty. + if (end != it) format_str_ = {it, detail::to_unsigned(end - it)}; return end; } template auto format(const std::tm& tm, FormatContext& ctx) const -> decltype(ctx.out()) { - const auto loc_ref = ctx.locale(); - detail::get_locale loc(static_cast(loc_ref), loc_ref); - auto w = detail::tm_writer(loc, ctx.out(), tm); - if (spec_ == spec::year_month_day) - w.on_iso_date(); - else if (spec_ == spec::hh_mm_ss) - w.on_iso_time(); - else - detail::parse_chrono_format(specs.begin(), specs.end(), w); - return w.out(); + return do_format(tm, ctx, nullptr); } }; -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_CHRONO_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h index e9b880ad431c..f0e9dd94ef3a 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/color.h @@ -11,7 +11,7 @@ #include "format.h" FMT_BEGIN_NAMESPACE -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT enum class color : uint32_t { alice_blue = 0xF0F8FF, // rgb(240,248,255) @@ -203,7 +203,7 @@ struct rgb { uint8_t b; }; -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { // color is a struct of either a rgb color or a terminal color. struct color_type { @@ -225,22 +225,21 @@ struct color_type { uint32_t rgb_color; } value; }; +} // namespace detail -FMT_END_DETAIL_NAMESPACE - -/** A text style consisting of foreground and background colors and emphasis. */ +/// A text style consisting of foreground and background colors and emphasis. class text_style { public: FMT_CONSTEXPR text_style(emphasis em = emphasis()) noexcept : set_foreground_color(), set_background_color(), ems(em) {} - FMT_CONSTEXPR text_style& operator|=(const text_style& rhs) { + FMT_CONSTEXPR auto operator|=(const text_style& rhs) -> text_style& { if (!set_foreground_color) { set_foreground_color = rhs.set_foreground_color; foreground_color = rhs.foreground_color; } else if (rhs.set_foreground_color) { if (!foreground_color.is_rgb || !rhs.foreground_color.is_rgb) - FMT_THROW(format_error("can't OR a terminal color")); + report_error("can't OR a terminal color"); foreground_color.value.rgb_color |= rhs.foreground_color.value.rgb_color; } @@ -249,7 +248,7 @@ class text_style { background_color = rhs.background_color; } else if (rhs.set_background_color) { if (!background_color.is_rgb || !rhs.background_color.is_rgb) - FMT_THROW(format_error("can't OR a terminal color")); + report_error("can't OR a terminal color"); background_color.value.rgb_color |= rhs.background_color.value.rgb_color; } @@ -258,29 +257,29 @@ class text_style { return *this; } - friend FMT_CONSTEXPR text_style operator|(text_style lhs, - const text_style& rhs) { + friend FMT_CONSTEXPR auto operator|(text_style lhs, const text_style& rhs) + -> text_style { return lhs |= rhs; } - FMT_CONSTEXPR bool has_foreground() const noexcept { + FMT_CONSTEXPR auto has_foreground() const noexcept -> bool { return set_foreground_color; } - FMT_CONSTEXPR bool has_background() const noexcept { + FMT_CONSTEXPR auto has_background() const noexcept -> bool { return set_background_color; } - FMT_CONSTEXPR bool has_emphasis() const noexcept { + FMT_CONSTEXPR auto has_emphasis() const noexcept -> bool { return static_cast(ems) != 0; } - FMT_CONSTEXPR detail::color_type get_foreground() const noexcept { + FMT_CONSTEXPR auto get_foreground() const noexcept -> detail::color_type { FMT_ASSERT(has_foreground(), "no foreground specified for this style"); return foreground_color; } - FMT_CONSTEXPR detail::color_type get_background() const noexcept { + FMT_CONSTEXPR auto get_background() const noexcept -> detail::color_type { FMT_ASSERT(has_background(), "no background specified for this style"); return background_color; } - FMT_CONSTEXPR emphasis get_emphasis() const noexcept { + FMT_CONSTEXPR auto get_emphasis() const noexcept -> emphasis { FMT_ASSERT(has_emphasis(), "no emphasis specified for this style"); return ems; } @@ -298,9 +297,11 @@ class text_style { } } - friend FMT_CONSTEXPR text_style fg(detail::color_type foreground) noexcept; + friend FMT_CONSTEXPR auto fg(detail::color_type foreground) noexcept + -> text_style; - friend FMT_CONSTEXPR text_style bg(detail::color_type background) noexcept; + friend FMT_CONSTEXPR auto bg(detail::color_type background) noexcept + -> text_style; detail::color_type foreground_color; detail::color_type background_color; @@ -309,21 +310,24 @@ class text_style { emphasis ems; }; -/** Creates a text style from the foreground (text) color. */ -FMT_CONSTEXPR inline text_style fg(detail::color_type foreground) noexcept { +/// Creates a text style from the foreground (text) color. +FMT_CONSTEXPR inline auto fg(detail::color_type foreground) noexcept + -> text_style { return text_style(true, foreground); } -/** Creates a text style from the background color. */ -FMT_CONSTEXPR inline text_style bg(detail::color_type background) noexcept { +/// Creates a text style from the background color. +FMT_CONSTEXPR inline auto bg(detail::color_type background) noexcept + -> text_style { return text_style(false, background); } -FMT_CONSTEXPR inline text_style operator|(emphasis lhs, emphasis rhs) noexcept { +FMT_CONSTEXPR inline auto operator|(emphasis lhs, emphasis rhs) noexcept + -> text_style { return text_style(lhs) | rhs; } -FMT_BEGIN_DETAIL_NAMESPACE +namespace detail { template struct ansi_color_escape { FMT_CONSTEXPR ansi_color_escape(detail::color_type text_color, @@ -385,9 +389,9 @@ template struct ansi_color_escape { } FMT_CONSTEXPR operator const Char*() const noexcept { return buffer; } - FMT_CONSTEXPR const Char* begin() const noexcept { return buffer; } - FMT_CONSTEXPR_CHAR_TRAITS const Char* end() const noexcept { - return buffer + std::char_traits::length(buffer); + FMT_CONSTEXPR auto begin() const noexcept -> const Char* { return buffer; } + FMT_CONSTEXPR20 auto end() const noexcept -> const Char* { + return buffer + basic_string_view(buffer).size(); } private: @@ -401,66 +405,45 @@ template struct ansi_color_escape { out[2] = static_cast('0' + c % 10); out[3] = static_cast(delimiter); } - static FMT_CONSTEXPR bool has_emphasis(emphasis em, emphasis mask) noexcept { + static FMT_CONSTEXPR auto has_emphasis(emphasis em, emphasis mask) noexcept + -> bool { return static_cast(em) & static_cast(mask); } }; template -FMT_CONSTEXPR ansi_color_escape make_foreground_color( - detail::color_type foreground) noexcept { +FMT_CONSTEXPR auto make_foreground_color(detail::color_type foreground) noexcept + -> ansi_color_escape { return ansi_color_escape(foreground, "\x1b[38;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_background_color( - detail::color_type background) noexcept { +FMT_CONSTEXPR auto make_background_color(detail::color_type background) noexcept + -> ansi_color_escape { return ansi_color_escape(background, "\x1b[48;2;"); } template -FMT_CONSTEXPR ansi_color_escape make_emphasis(emphasis em) noexcept { +FMT_CONSTEXPR auto make_emphasis(emphasis em) noexcept + -> ansi_color_escape { return ansi_color_escape(em); } -template inline void fputs(const Char* chars, FILE* stream) { - int result = std::fputs(chars, stream); -#if !__NVCC__ - if (result < 0) - FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif -} - -template <> inline void fputs(const wchar_t* chars, FILE* stream) { - int result = std::fputws(chars, stream); -#if !__NVCC__ - if (result < 0) - FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif -} - -template inline void reset_color(FILE* stream) { - fputs("\x1b[0m", stream); -} - -template <> inline void reset_color(FILE* stream) { - fputs(L"\x1b[0m", stream); -} - template inline void reset_color(buffer& buffer) { auto reset_color = string_view("\x1b[0m"); buffer.append(reset_color.begin(), reset_color.end()); } -template struct styled_arg { +template struct styled_arg : detail::view { const T& value; text_style style; + styled_arg(const T& v, text_style s) : value(v), style(s) {} }; template -void vformat_to(buffer& buf, const text_style& ts, - basic_string_view format_str, - basic_format_args>> args) { +void vformat_to( + buffer& buf, const text_style& ts, basic_string_view format_str, + basic_format_args>> args) { bool has_style = false; if (ts.has_emphasis()) { has_style = true; @@ -481,118 +464,94 @@ void vformat_to(buffer& buf, const text_style& ts, if (has_style) detail::reset_color(buf); } -FMT_END_DETAIL_NAMESPACE - -template > -void vprint(std::FILE* f, const text_style& ts, const S& format, - basic_format_args>> args) { - basic_memory_buffer buf; - detail::vformat_to(buf, ts, detail::to_string_view(format), args); - if (detail::is_utf8()) { - detail::print(f, basic_string_view(buf.begin(), buf.size())); - } else { - buf.push_back(Char(0)); - detail::fputs(buf.data(), f); - } +} // namespace detail + +inline void vprint(FILE* f, const text_style& ts, string_view fmt, + format_args args) { + auto buf = memory_buffer(); + detail::vformat_to(buf, ts, fmt, args); + print(f, FMT_STRING("{}"), string_view(buf.begin(), buf.size())); } /** - \rst - Formats a string and prints it to the specified file stream using ANSI - escape sequences to specify text formatting. - - **Example**:: - - fmt::print(fmt::emphasis::bold | fg(fmt::color::red), - "Elapsed time: {0:.2f} seconds", 1.23); - \endrst + * Formats a string and prints it to the specified file stream using ANSI + * escape sequences to specify text formatting. + * + * **Example**: + * + * fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + * "Elapsed time: {0:.2f} seconds", 1.23); */ -template ::value)> -void print(std::FILE* f, const text_style& ts, const S& format_str, - const Args&... args) { - vprint(f, ts, format_str, - fmt::make_format_args>>(args...)); +template +void print(FILE* f, const text_style& ts, format_string fmt, + T&&... args) { + vprint(f, ts, fmt, fmt::make_format_args(args...)); } /** - \rst - Formats a string and prints it to stdout using ANSI escape sequences to - specify text formatting. - - **Example**:: - - fmt::print(fmt::emphasis::bold | fg(fmt::color::red), - "Elapsed time: {0:.2f} seconds", 1.23); - \endrst + * Formats a string and prints it to stdout using ANSI escape sequences to + * specify text formatting. + * + * **Example**: + * + * fmt::print(fmt::emphasis::bold | fg(fmt::color::red), + * "Elapsed time: {0:.2f} seconds", 1.23); */ -template ::value)> -void print(const text_style& ts, const S& format_str, const Args&... args) { - return print(stdout, ts, format_str, args...); +template +void print(const text_style& ts, format_string fmt, T&&... args) { + return print(stdout, ts, fmt, std::forward(args)...); } -template > -inline std::basic_string vformat( - const text_style& ts, const S& format_str, - basic_format_args>> args) { - basic_memory_buffer buf; - detail::vformat_to(buf, ts, detail::to_string_view(format_str), args); +inline auto vformat(const text_style& ts, string_view fmt, format_args args) + -> std::string { + auto buf = memory_buffer(); + detail::vformat_to(buf, ts, fmt, args); return fmt::to_string(buf); } /** - \rst - Formats arguments and returns the result as a string using ANSI - escape sequences to specify text formatting. - - **Example**:: - - #include - std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red), - "The answer is {}", 42); - \endrst -*/ -template > -inline std::basic_string format(const text_style& ts, const S& format_str, - const Args&... args) { - return fmt::vformat(ts, detail::to_string_view(format_str), - fmt::make_format_args>(args...)); + * Formats arguments and returns the result as a string using ANSI escape + * sequences to specify text formatting. + * + * **Example**: + * + * ``` + * #include + * std::string message = fmt::format(fmt::emphasis::bold | fg(fmt::color::red), + * "The answer is {}", 42); + * ``` + */ +template +inline auto format(const text_style& ts, format_string fmt, T&&... args) + -> std::string { + return fmt::vformat(ts, fmt, fmt::make_format_args(args...)); } -/** - Formats a string with the given text_style and writes the output to ``out``. - */ -template ::value)> -OutputIt vformat_to( - OutputIt out, const text_style& ts, basic_string_view format_str, - basic_format_args>> args) { - auto&& buf = detail::get_buffer(out); - detail::vformat_to(buf, ts, format_str, args); +/// Formats a string with the given text_style and writes the output to `out`. +template ::value)> +auto vformat_to(OutputIt out, const text_style& ts, string_view fmt, + format_args args) -> OutputIt { + auto&& buf = detail::get_buffer(out); + detail::vformat_to(buf, ts, fmt, args); return detail::get_iterator(buf, out); } /** - \rst - Formats arguments with the given text_style, writes the result to the output - iterator ``out`` and returns the iterator past the end of the output range. - - **Example**:: - - std::vector out; - fmt::format_to(std::back_inserter(out), - fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); - \endrst -*/ -template >::value&& - detail::is_string::value> -inline auto format_to(OutputIt out, const text_style& ts, const S& format_str, - Args&&... args) -> - typename std::enable_if::type { - return vformat_to(out, ts, detail::to_string_view(format_str), - fmt::make_format_args>>(args...)); + * Formats arguments with the given text style, writes the result to the output + * iterator `out` and returns the iterator past the end of the output range. + * + * **Example**: + * + * std::vector out; + * fmt::format_to(std::back_inserter(out), + * fmt::emphasis::bold | fg(fmt::color::red), "{}", 42); + */ +template ::value)> +inline auto format_to(OutputIt out, const text_style& ts, + format_string fmt, T&&... args) -> OutputIt { + return vformat_to(out, ts, fmt, fmt::make_format_args(args...)); } template @@ -632,16 +591,14 @@ struct formatter, Char> : formatter { }; /** - \rst - Returns an argument that will be formatted using ANSI escape sequences, - to be used in a formatting function. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", - fmt::styled(1.23, fmt::fg(fmt::color::green) | - fmt::bg(fmt::color::blue))); - \endrst + * Returns an argument that will be formatted using ANSI escape sequences, + * to be used in a formatting function. + * + * **Example**: + * + * fmt::print("Elapsed time: {0:.2f} seconds", + * fmt::styled(1.23, fmt::fg(fmt::color::green) | + * fmt::bg(fmt::color::blue))); */ template FMT_CONSTEXPR auto styled(const T& value, text_style ts) @@ -649,7 +606,7 @@ FMT_CONSTEXPR auto styled(const T& value, text_style ts) return detail::styled_arg>{value, ts}; } -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_COLOR_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h index 933668c41c3e..b2afc2c309f4 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/compile.h @@ -8,117 +8,41 @@ #ifndef FMT_COMPILE_H_ #define FMT_COMPILE_H_ +#ifndef FMT_MODULE +# include // std::back_inserter +#endif + #include "format.h" FMT_BEGIN_NAMESPACE + +// A compile-time string which is compiled into fast formatting code. +FMT_EXPORT class compiled_string {}; + namespace detail { -template -FMT_CONSTEXPR inline counting_iterator copy_str(InputIt begin, InputIt end, - counting_iterator it) { +template +FMT_CONSTEXPR inline auto copy(InputIt begin, InputIt end, counting_iterator it) + -> counting_iterator { return it + (end - begin); } -template class truncating_iterator_base { - protected: - OutputIt out_; - size_t limit_; - size_t count_ = 0; - - truncating_iterator_base() : out_(), limit_(0) {} - - truncating_iterator_base(OutputIt out, size_t limit) - : out_(out), limit_(limit) {} - - public: - using iterator_category = std::output_iterator_tag; - using value_type = typename std::iterator_traits::value_type; - using difference_type = std::ptrdiff_t; - using pointer = void; - using reference = void; - FMT_UNCHECKED_ITERATOR(truncating_iterator_base); - - OutputIt base() const { return out_; } - size_t count() const { return count_; } -}; - -// An output iterator that truncates the output and counts the number of objects -// written to it. -template ::value_type>::type> -class truncating_iterator; - -template -class truncating_iterator - : public truncating_iterator_base { - mutable typename truncating_iterator_base::value_type blackhole_; - - public: - using value_type = typename truncating_iterator_base::value_type; - - truncating_iterator() = default; - - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - truncating_iterator& operator++() { - if (this->count_++ < this->limit_) ++this->out_; - return *this; - } - - truncating_iterator operator++(int) { - auto it = *this; - ++*this; - return it; - } - - value_type& operator*() const { - return this->count_ < this->limit_ ? *this->out_ : blackhole_; - } -}; - -template -class truncating_iterator - : public truncating_iterator_base { - public: - truncating_iterator() = default; - - truncating_iterator(OutputIt out, size_t limit) - : truncating_iterator_base(out, limit) {} - - template truncating_iterator& operator=(T val) { - if (this->count_++ < this->limit_) *this->out_++ = val; - return *this; - } - - truncating_iterator& operator++() { return *this; } - truncating_iterator& operator++(int) { return *this; } - truncating_iterator& operator*() { return *this; } -}; - -// A compile-time string which is compiled into fast formatting code. -class compiled_string {}; - template struct is_compiled_string : std::is_base_of {}; /** - \rst - Converts a string literal *s* into a format string that will be parsed at - compile time and converted into efficient formatting code. Requires C++17 - ``constexpr if`` compiler support. - - **Example**:: - - // Converts 42 into std::string using the most efficient method and no - // runtime format string processing. - std::string s = fmt::format(FMT_COMPILE("{}"), 42); - \endrst + * Converts a string literal `s` into a format string that will be parsed at + * compile time and converted into efficient formatting code. Requires C++17 + * `constexpr if` compiler support. + * + * **Example**: + * + * // Converts 42 into std::string using the most efficient method and no + * // runtime format string processing. + * std::string s = fmt::format(FMT_COMPILE("{}"), 42); */ #if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) -# define FMT_COMPILE(s) \ - FMT_STRING_IMPL(s, fmt::detail::compiled_string, explicit) +# define FMT_COMPILE(s) FMT_STRING_IMPL(s, fmt::compiled_string, explicit) #else # define FMT_COMPILE(s) FMT_STRING(s) #endif @@ -135,7 +59,7 @@ struct udl_compiled_string : compiled_string { #endif template -const T& first(const T& value, const Tail&...) { +auto first(const T& value, const Tail&...) -> const T& { return value; } @@ -196,7 +120,8 @@ template struct code_unit { template constexpr OutputIt format(OutputIt out, const Args&...) const { - return write(out, value); + *out++ = value; + return out; } }; @@ -220,7 +145,12 @@ template struct field { template constexpr OutputIt format(OutputIt out, const Args&... args) const { - return write(out, get_arg_checked(args...)); + const T& arg = get_arg_checked(args...); + if constexpr (std::is_convertible>::value) { + auto s = basic_string_view(arg); + return copy(s.begin(), s.end(), out); + } + return write(out, arg); } }; @@ -308,13 +238,12 @@ constexpr size_t parse_text(basic_string_view str, size_t pos) { } template -constexpr auto compile_format_string(S format_str); +constexpr auto compile_format_string(S fmt); template -constexpr auto parse_tail(T head, S format_str) { - if constexpr (POS != - basic_string_view(format_str).size()) { - constexpr auto tail = compile_format_string(format_str); +constexpr auto parse_tail(T head, S fmt) { + if constexpr (POS != basic_string_view(fmt).size()) { + constexpr auto tail = compile_format_string(fmt); if constexpr (std::is_same, unknown_format>()) return tail; @@ -331,14 +260,14 @@ template struct parse_specs_result { int next_arg_id; }; -constexpr int manual_indexing_id = -1; +enum { manual_indexing_id = -1 }; template constexpr parse_specs_result parse_specs(basic_string_view str, size_t pos, int next_arg_id) { str.remove_prefix(pos); - auto ctx = compile_parse_context(str, max_value(), nullptr, {}, - next_arg_id); + auto ctx = + compile_parse_context(str, max_value(), nullptr, next_arg_id); auto f = formatter(); auto end = f.parse(ctx); return {f, pos + fmt::detail::to_unsigned(end - str.data()), @@ -348,22 +277,18 @@ constexpr parse_specs_result parse_specs(basic_string_view str, template struct arg_id_handler { arg_ref arg_id; - constexpr int operator()() { + constexpr int on_auto() { FMT_ASSERT(false, "handler cannot be used with automatic indexing"); return 0; } - constexpr int operator()(int id) { + constexpr int on_index(int id) { arg_id = arg_ref(id); return 0; } - constexpr int operator()(basic_string_view id) { + constexpr int on_name(basic_string_view id) { arg_id = arg_ref(id); return 0; } - - constexpr void on_error(const char* message) { - FMT_THROW(format_error(message)); - } }; template struct parse_arg_id_result { @@ -389,14 +314,13 @@ struct field_type::value>> { template -constexpr auto parse_replacement_field_then_tail(S format_str) { +constexpr auto parse_replacement_field_then_tail(S fmt) { using char_type = typename S::char_type; - constexpr auto str = basic_string_view(format_str); + constexpr auto str = basic_string_view(fmt); constexpr char_type c = END_POS != str.size() ? str[END_POS] : char_type(); if constexpr (c == '}') { return parse_tail( - field::type, ARG_INDEX>(), - format_str); + field::type, ARG_INDEX>(), fmt); } else if constexpr (c != ':') { FMT_THROW(format_error("expected ':'")); } else { @@ -409,7 +333,7 @@ constexpr auto parse_replacement_field_then_tail(S format_str) { return parse_tail( spec_field::type, ARG_INDEX>{ result.fmt}, - format_str); + fmt); } } } @@ -417,22 +341,21 @@ constexpr auto parse_replacement_field_then_tail(S format_str) { // Compiles a non-empty format string and returns the compiled representation // or unknown_format() on unrecognized input. template -constexpr auto compile_format_string(S format_str) { +constexpr auto compile_format_string(S fmt) { using char_type = typename S::char_type; - constexpr auto str = basic_string_view(format_str); + constexpr auto str = basic_string_view(fmt); if constexpr (str[POS] == '{') { if constexpr (POS + 1 == str.size()) FMT_THROW(format_error("unmatched '{' in format string")); if constexpr (str[POS + 1] == '{') { - return parse_tail(make_text(str, POS, 1), format_str); + return parse_tail(make_text(str, POS, 1), fmt); } else if constexpr (str[POS + 1] == '}' || str[POS + 1] == ':') { static_assert(ID != manual_indexing_id, "cannot switch from manual to automatic argument indexing"); constexpr auto next_id = ID != manual_indexing_id ? ID + 1 : manual_indexing_id; return parse_replacement_field_then_tail, Args, - POS + 1, ID, next_id>( - format_str); + POS + 1, ID, next_id>(fmt); } else { constexpr auto arg_id_result = parse_arg_id(str.data() + POS + 1, str.data() + str.size()); @@ -448,60 +371,55 @@ constexpr auto compile_format_string(S format_str) { return parse_replacement_field_then_tail, Args, arg_id_end_pos, arg_index, manual_indexing_id>( - format_str); + fmt); } else if constexpr (arg_id_result.arg_id.kind == arg_id_kind::name) { constexpr auto arg_index = get_arg_index_by_name(arg_id_result.arg_id.val.name, Args{}); - if constexpr (arg_index != invalid_arg_index) { + if constexpr (arg_index >= 0) { constexpr auto next_id = ID != manual_indexing_id ? ID + 1 : manual_indexing_id; return parse_replacement_field_then_tail< decltype(get_type::value), Args, arg_id_end_pos, - arg_index, next_id>(format_str); - } else { - if constexpr (c == '}') { - return parse_tail( - runtime_named_field{arg_id_result.arg_id.val.name}, - format_str); - } else if constexpr (c == ':') { - return unknown_format(); // no type info for specs parsing - } + arg_index, next_id>(fmt); + } else if constexpr (c == '}') { + return parse_tail( + runtime_named_field{arg_id_result.arg_id.val.name}, + fmt); + } else if constexpr (c == ':') { + return unknown_format(); // no type info for specs parsing } } } } else if constexpr (str[POS] == '}') { if constexpr (POS + 1 == str.size()) FMT_THROW(format_error("unmatched '}' in format string")); - return parse_tail(make_text(str, POS, 1), format_str); + return parse_tail(make_text(str, POS, 1), fmt); } else { constexpr auto end = parse_text(str, POS + 1); if constexpr (end - POS > 1) { - return parse_tail(make_text(str, POS, end - POS), - format_str); + return parse_tail(make_text(str, POS, end - POS), fmt); } else { - return parse_tail(code_unit{str[POS]}, - format_str); + return parse_tail(code_unit{str[POS]}, fmt); } } } template ::value)> -constexpr auto compile(S format_str) { - constexpr auto str = basic_string_view(format_str); +constexpr auto compile(S fmt) { + constexpr auto str = basic_string_view(fmt); if constexpr (str.size() == 0) { return detail::make_text(str, 0, 0); } else { constexpr auto result = - detail::compile_format_string, 0, 0>( - format_str); + detail::compile_format_string, 0, 0>(fmt); return result; } } #endif // defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT #if defined(__cpp_if_constexpr) && defined(__cpp_return_type_deduction) @@ -566,33 +484,33 @@ FMT_CONSTEXPR OutputIt format_to(OutputIt out, const S&, Args&&... args) { template ::value)> -format_to_n_result format_to_n(OutputIt out, size_t n, - const S& format_str, Args&&... args) { - auto it = fmt::format_to(detail::truncating_iterator(out, n), - format_str, std::forward(args)...); - return {it.base(), it.count()}; +auto format_to_n(OutputIt out, size_t n, const S& fmt, Args&&... args) + -> format_to_n_result { + using traits = detail::fixed_buffer_traits; + auto buf = detail::iterator_buffer(out, n); + fmt::format_to(std::back_inserter(buf), fmt, std::forward(args)...); + return {buf.out(), buf.count()}; } template ::value)> -FMT_CONSTEXPR20 size_t formatted_size(const S& format_str, - const Args&... args) { - return fmt::format_to(detail::counting_iterator(), format_str, args...) - .count(); +FMT_CONSTEXPR20 auto formatted_size(const S& fmt, const Args&... args) + -> size_t { + return fmt::format_to(detail::counting_iterator(), fmt, args...).count(); } template ::value)> -void print(std::FILE* f, const S& format_str, const Args&... args) { +void print(std::FILE* f, const S& fmt, const Args&... args) { memory_buffer buffer; - fmt::format_to(std::back_inserter(buffer), format_str, args...); + fmt::format_to(std::back_inserter(buffer), fmt, args...); detail::print(f, {buffer.data(), buffer.size()}); } template ::value)> -void print(const S& format_str, const Args&... args) { - print(stdout, format_str, args...); +void print(const S& fmt, const Args&... args) { + print(stdout, fmt, args...); } #if FMT_USE_NONTYPE_TEMPLATE_ARGS @@ -605,7 +523,7 @@ template constexpr auto operator""_cf() { } // namespace literals #endif -FMT_MODULE_EXPORT_END +FMT_END_EXPORT FMT_END_NAMESPACE #endif // FMT_COMPILE_H_ diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h index af61b22c44ec..8ca735f0c004 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/core.h @@ -1,3343 +1,5 @@ -// Formatting library for C++ - the core API for char/UTF-8 -// -// Copyright (c) 2012 - present, Victor Zverovich -// All rights reserved. -// -// For the license information refer to format.h. +// This file is only provided for compatibility and may be removed in future +// versions. Use fmt/base.h if you don't need fmt::format and fmt/format.h +// otherwise. -#ifndef FMT_CORE_H_ -#define FMT_CORE_H_ - -#include // std::byte -#include // std::FILE -#include // std::strlen -#include -#include -#include -#include - -// The fmt library version in the form major * 10000 + minor * 100 + patch. -#define FMT_VERSION 90101 - -#define FMT_HEADER_ONLY - -#if defined(__clang__) && !defined(__ibmxl__) -# define FMT_CLANG_VERSION (__clang_major__ * 100 + __clang_minor__) -#else -# define FMT_CLANG_VERSION 0 -#endif - -#if defined(__GNUC__) && !defined(__clang__) && !defined(__INTEL_COMPILER) && \ - !defined(__NVCOMPILER) -# define FMT_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) -#else -# define FMT_GCC_VERSION 0 -#endif - -#ifndef FMT_GCC_PRAGMA -// Workaround _Pragma bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=59884. -# if FMT_GCC_VERSION >= 504 -# define FMT_GCC_PRAGMA(arg) _Pragma(arg) -# else -# define FMT_GCC_PRAGMA(arg) -# endif -#endif - -#ifdef __ICL -# define FMT_ICC_VERSION __ICL -#elif defined(__INTEL_COMPILER) -# define FMT_ICC_VERSION __INTEL_COMPILER -#else -# define FMT_ICC_VERSION 0 -#endif - -#ifdef _MSC_VER -# define FMT_MSC_VERSION _MSC_VER -# define FMT_MSC_WARNING(...) __pragma(warning(__VA_ARGS__)) -#else -# define FMT_MSC_VERSION 0 -# define FMT_MSC_WARNING(...) -#endif - -#ifdef _MSVC_LANG -# define FMT_CPLUSPLUS _MSVC_LANG -#else -# define FMT_CPLUSPLUS __cplusplus -#endif - -#ifdef __has_feature -# define FMT_HAS_FEATURE(x) __has_feature(x) -#else -# define FMT_HAS_FEATURE(x) 0 -#endif - -#if defined(__has_include) || FMT_ICC_VERSION >= 1600 || FMT_MSC_VERSION > 1900 -# define FMT_HAS_INCLUDE(x) __has_include(x) -#else -# define FMT_HAS_INCLUDE(x) 0 -#endif - -#ifdef __has_cpp_attribute -# define FMT_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) -#else -# define FMT_HAS_CPP_ATTRIBUTE(x) 0 -#endif - -#define FMT_HAS_CPP14_ATTRIBUTE(attribute) \ - (FMT_CPLUSPLUS >= 201402L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -#define FMT_HAS_CPP17_ATTRIBUTE(attribute) \ - (FMT_CPLUSPLUS >= 201703L && FMT_HAS_CPP_ATTRIBUTE(attribute)) - -// Check if relaxed C++14 constexpr is supported. -// GCC doesn't allow throw in constexpr until version 6 (bug 67371). -#ifndef FMT_USE_CONSTEXPR -# if (FMT_HAS_FEATURE(cxx_relaxed_constexpr) || FMT_MSC_VERSION >= 1912 || \ - (FMT_GCC_VERSION >= 600 && FMT_CPLUSPLUS >= 201402L)) && \ - !FMT_ICC_VERSION && !defined(__NVCC__) -# define FMT_USE_CONSTEXPR 1 -# else -# define FMT_USE_CONSTEXPR 0 -# endif -#endif -#if FMT_USE_CONSTEXPR -# define FMT_CONSTEXPR constexpr -#else -# define FMT_CONSTEXPR -#endif - -#if ((FMT_CPLUSPLUS >= 202002L) && \ - (!defined(_GLIBCXX_RELEASE) || _GLIBCXX_RELEASE > 9)) || \ - (FMT_CPLUSPLUS >= 201709L && FMT_GCC_VERSION >= 1002) -# define FMT_CONSTEXPR20 constexpr -#else -# define FMT_CONSTEXPR20 -#endif - -// Check if constexpr std::char_traits<>::{compare,length} are supported. -#if defined(__GLIBCXX__) -# if FMT_CPLUSPLUS >= 201703L && defined(_GLIBCXX_RELEASE) && \ - _GLIBCXX_RELEASE >= 7 // GCC 7+ libstdc++ has _GLIBCXX_RELEASE. -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -# endif -#elif defined(_LIBCPP_VERSION) && FMT_CPLUSPLUS >= 201703L && \ - _LIBCPP_VERSION >= 4000 -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#elif FMT_MSC_VERSION >= 1914 && FMT_CPLUSPLUS >= 201703L -# define FMT_CONSTEXPR_CHAR_TRAITS constexpr -#endif -#ifndef FMT_CONSTEXPR_CHAR_TRAITS -# define FMT_CONSTEXPR_CHAR_TRAITS -#endif - -// Check if exceptions are disabled. -#ifndef FMT_EXCEPTIONS -# if (defined(__GNUC__) && !defined(__EXCEPTIONS)) || \ - (FMT_MSC_VERSION && !_HAS_EXCEPTIONS) -# define FMT_EXCEPTIONS 0 -# else -# define FMT_EXCEPTIONS 1 -# endif -#endif - -#ifndef FMT_DEPRECATED -# if FMT_HAS_CPP14_ATTRIBUTE(deprecated) || FMT_MSC_VERSION >= 1900 -# define FMT_DEPRECATED [[deprecated]] -# else -# if (defined(__GNUC__) && !defined(__LCC__)) || defined(__clang__) -# define FMT_DEPRECATED __attribute__((deprecated)) -# elif FMT_MSC_VERSION -# define FMT_DEPRECATED __declspec(deprecated) -# else -# define FMT_DEPRECATED /* deprecated */ -# endif -# endif -#endif - -// [[noreturn]] is disabled on MSVC and NVCC because of bogus unreachable code -// warnings. -#if FMT_EXCEPTIONS && FMT_HAS_CPP_ATTRIBUTE(noreturn) && !FMT_MSC_VERSION && \ - !defined(__NVCC__) -# define FMT_NORETURN [[noreturn]] -#else -# define FMT_NORETURN -#endif - -#if FMT_HAS_CPP17_ATTRIBUTE(fallthrough) -# define FMT_FALLTHROUGH [[fallthrough]] -#elif defined(__clang__) -# define FMT_FALLTHROUGH [[clang::fallthrough]] -#elif FMT_GCC_VERSION >= 700 && \ - (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= 520) -# define FMT_FALLTHROUGH [[gnu::fallthrough]] -#else -# define FMT_FALLTHROUGH -#endif - -#ifndef FMT_NODISCARD -# if FMT_HAS_CPP17_ATTRIBUTE(nodiscard) -# define FMT_NODISCARD [[nodiscard]] -# else -# define FMT_NODISCARD -# endif -#endif - -#ifndef FMT_USE_FLOAT -# define FMT_USE_FLOAT 1 -#endif -#ifndef FMT_USE_DOUBLE -# define FMT_USE_DOUBLE 1 -#endif -#ifndef FMT_USE_LONG_DOUBLE -# define FMT_USE_LONG_DOUBLE 1 -#endif - -#ifndef FMT_INLINE -# if FMT_GCC_VERSION || FMT_CLANG_VERSION -# define FMT_INLINE inline __attribute__((always_inline)) -# else -# define FMT_INLINE inline -# endif -#endif - -// An inline std::forward replacement. -#define FMT_FORWARD(...) static_cast(__VA_ARGS__) - -#ifdef _MSC_VER -# define FMT_UNCHECKED_ITERATOR(It) \ - using _Unchecked_type = It // Mark iterator as checked. -#else -# define FMT_UNCHECKED_ITERATOR(It) using unchecked_type = It -#endif - -#ifndef FMT_BEGIN_NAMESPACE -# define FMT_BEGIN_NAMESPACE \ - namespace fmt { \ - inline namespace v9 { -# define FMT_END_NAMESPACE \ - } \ - } -#endif - -#ifndef FMT_MODULE_EXPORT -# define FMT_MODULE_EXPORT -# define FMT_MODULE_EXPORT_BEGIN -# define FMT_MODULE_EXPORT_END -# define FMT_BEGIN_DETAIL_NAMESPACE namespace detail { -# define FMT_END_DETAIL_NAMESPACE } -#endif - -#if !defined(FMT_HEADER_ONLY) && defined(_WIN32) -# define FMT_CLASS_API FMT_MSC_WARNING(suppress : 4275) -# ifdef FMT_EXPORT -# define FMT_API __declspec(dllexport) -# elif defined(FMT_SHARED) -# define FMT_API __declspec(dllimport) -# endif -#else -# define FMT_CLASS_API -# if defined(FMT_EXPORT) || defined(FMT_SHARED) -# if defined(__GNUC__) || defined(__clang__) -# define FMT_API __attribute__((visibility("default"))) -# endif -# endif -#endif -#ifndef FMT_API -# define FMT_API -#endif - -// libc++ supports string_view in pre-c++17. -#if FMT_HAS_INCLUDE() && \ - (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) -# include -# define FMT_USE_STRING_VIEW -#elif FMT_HAS_INCLUDE("experimental/string_view") && FMT_CPLUSPLUS >= 201402L -# include -# define FMT_USE_EXPERIMENTAL_STRING_VIEW -#endif - -#ifndef FMT_UNICODE -# define FMT_UNICODE !FMT_MSC_VERSION -#endif - -#ifndef FMT_CONSTEVAL -# if ((FMT_GCC_VERSION >= 1000 || FMT_CLANG_VERSION >= 1101) && \ - FMT_CPLUSPLUS >= 202002L && !defined(__apple_build_version__)) || \ - (defined(__cpp_consteval) && \ - (!FMT_MSC_VERSION || _MSC_FULL_VER >= 193030704)) -// consteval is broken in MSVC before VS2022 and Apple clang 13. -# define FMT_CONSTEVAL consteval -# define FMT_HAS_CONSTEVAL -# else -# define FMT_CONSTEVAL -# endif -#endif - -#ifndef FMT_USE_NONTYPE_TEMPLATE_ARGS -# if defined(__cpp_nontype_template_args) && \ - ((FMT_GCC_VERSION >= 903 && FMT_CPLUSPLUS >= 201709L) || \ - __cpp_nontype_template_args >= 201911L) && \ - !defined(__NVCOMPILER) && !defined(__LCC__) -# define FMT_USE_NONTYPE_TEMPLATE_ARGS 1 -# else -# define FMT_USE_NONTYPE_TEMPLATE_ARGS 0 -# endif -#endif - -// Enable minimal optimizations for more compact code in debug mode. -FMT_GCC_PRAGMA("GCC push_options") -#if !defined(__OPTIMIZE__) && !defined(__NVCOMPILER) && !defined(__LCC__) -FMT_GCC_PRAGMA("GCC optimize(\"Og\")") -#endif - -FMT_BEGIN_NAMESPACE -FMT_MODULE_EXPORT_BEGIN - -// Implementations of enable_if_t and other metafunctions for older systems. -template -using enable_if_t = typename std::enable_if::type; -template -using conditional_t = typename std::conditional::type; -template using bool_constant = std::integral_constant; -template -using remove_reference_t = typename std::remove_reference::type; -template -using remove_const_t = typename std::remove_const::type; -template -using remove_cvref_t = typename std::remove_cv>::type; -template struct type_identity { using type = T; }; -template using type_identity_t = typename type_identity::type; -template -using underlying_t = typename std::underlying_type::type; - -template struct disjunction : std::false_type {}; -template struct disjunction

: P {}; -template -struct disjunction - : conditional_t> {}; - -template struct conjunction : std::true_type {}; -template struct conjunction

: P {}; -template -struct conjunction - : conditional_t, P1> {}; - -struct monostate { - constexpr monostate() {} -}; - -// An enable_if helper to be used in template parameters which results in much -// shorter symbols: https://godbolt.org/z/sWw4vP. Extra parentheses are needed -// to workaround a bug in MSVC 2019 (see #1140 and #1186). -#ifdef FMT_DOC -# define FMT_ENABLE_IF(...) -#else -# define FMT_ENABLE_IF(...) fmt::enable_if_t<(__VA_ARGS__), int> = 0 -#endif - -FMT_BEGIN_DETAIL_NAMESPACE - -// Suppresses "unused variable" warnings with the method described in -// https://herbsutter.com/2009/10/18/mailbag-shutting-up-compiler-warnings/. -// (void)var does not work on many Intel compilers. -template FMT_CONSTEXPR void ignore_unused(const T&...) {} - -constexpr FMT_INLINE auto is_constant_evaluated( - bool default_value = false) noexcept -> bool { -#ifdef __cpp_lib_is_constant_evaluated - ignore_unused(default_value); - return std::is_constant_evaluated(); -#else - return default_value; -#endif -} - -// Suppresses "conditional expression is constant" warnings. -template constexpr FMT_INLINE auto const_check(T value) -> T { - return value; -} - -FMT_NORETURN FMT_API void assert_fail(const char* file, int line, - const char* message); - -#ifndef FMT_ASSERT -# ifdef NDEBUG -// FMT_ASSERT is not empty to avoid -Wempty-body. -# define FMT_ASSERT(condition, message) \ - ::fmt::detail::ignore_unused((condition), (message)) -# else -# define FMT_ASSERT(condition, message) \ - ((condition) /* void() fails with -Winvalid-constexpr on clang 4.0.1 */ \ - ? (void)0 \ - : ::fmt::detail::assert_fail(__FILE__, __LINE__, (message))) -# endif -#endif - -#if defined(FMT_USE_STRING_VIEW) -template using std_string_view = std::basic_string_view; -#elif defined(FMT_USE_EXPERIMENTAL_STRING_VIEW) -template -using std_string_view = std::experimental::basic_string_view; -#else -template struct std_string_view {}; -#endif - -#ifdef FMT_USE_INT128 -// Do nothing. -#elif defined(__SIZEOF_INT128__) && !defined(__NVCC__) && \ - !(FMT_CLANG_VERSION && FMT_MSC_VERSION) -# define FMT_USE_INT128 1 -using int128_opt = __int128_t; // An optional native 128-bit integer. -using uint128_opt = __uint128_t; -template inline auto convert_for_visit(T value) -> T { - return value; -} -#else -# define FMT_USE_INT128 0 -#endif -#if !FMT_USE_INT128 -enum class int128_opt {}; -enum class uint128_opt {}; -// Reduce template instantiations. -template auto convert_for_visit(T) -> monostate { return {}; } -#endif - -// Casts a nonnegative integer to unsigned. -template -FMT_CONSTEXPR auto to_unsigned(Int value) -> - typename std::make_unsigned::type { - return static_cast::type>(value); -} - -FMT_MSC_WARNING(suppress : 4566) constexpr unsigned char micro[] = "\u00B5"; - -constexpr auto is_utf8() -> bool { - // Avoid buggy sign extensions in MSVC's constant evaluation mode (#2297). - using uchar = unsigned char; - return FMT_UNICODE || (sizeof(micro) == 3 && uchar(micro[0]) == 0xC2 && - uchar(micro[1]) == 0xB5); -} -FMT_END_DETAIL_NAMESPACE - -/** - An implementation of ``std::basic_string_view`` for pre-C++17. It provides a - subset of the API. ``fmt::basic_string_view`` is used for format strings even - if ``std::string_view`` is available to prevent issues when a library is - compiled with a different ``-std`` option than the client code (which is not - recommended). - */ -template class basic_string_view { - private: - const Char* data_; - size_t size_; - - public: - using value_type = Char; - using iterator = const Char*; - - constexpr basic_string_view() noexcept : data_(nullptr), size_(0) {} - - /** Constructs a string reference object from a C string and a size. */ - constexpr basic_string_view(const Char* s, size_t count) noexcept - : data_(s), size_(count) {} - - /** - \rst - Constructs a string reference object from a C string computing - the size with ``std::char_traits::length``. - \endrst - */ - FMT_CONSTEXPR_CHAR_TRAITS - FMT_INLINE - basic_string_view(const Char* s) - : data_(s), - size_(detail::const_check(std::is_same::value && - !detail::is_constant_evaluated(true)) - ? std::strlen(reinterpret_cast(s)) - : std::char_traits::length(s)) {} - - /** Constructs a string reference from a ``std::basic_string`` object. */ - template - FMT_CONSTEXPR basic_string_view( - const std::basic_string& s) noexcept - : data_(s.data()), size_(s.size()) {} - - template >::value)> - FMT_CONSTEXPR basic_string_view(S s) noexcept - : data_(s.data()), size_(s.size()) {} - - /** Returns a pointer to the string data. */ - constexpr auto data() const noexcept -> const Char* { return data_; } - - /** Returns the string size. */ - constexpr auto size() const noexcept -> size_t { return size_; } - - constexpr auto begin() const noexcept -> iterator { return data_; } - constexpr auto end() const noexcept -> iterator { return data_ + size_; } - - constexpr auto operator[](size_t pos) const noexcept -> const Char& { - return data_[pos]; - } - - FMT_CONSTEXPR void remove_prefix(size_t n) noexcept { - data_ += n; - size_ -= n; - } - - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with( - basic_string_view sv) const noexcept { - return size_ >= sv.size_ && - std::char_traits::compare(data_, sv.data_, sv.size_) == 0; - } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(Char c) const noexcept { - return size_ >= 1 && std::char_traits::eq(*data_, c); - } - FMT_CONSTEXPR_CHAR_TRAITS bool starts_with(const Char* s) const { - return starts_with(basic_string_view(s)); - } - - // Lexicographically compare this string reference to other. - FMT_CONSTEXPR_CHAR_TRAITS auto compare(basic_string_view other) const -> int { - size_t str_size = size_ < other.size_ ? size_ : other.size_; - int result = std::char_traits::compare(data_, other.data_, str_size); - if (result == 0) - result = size_ == other.size_ ? 0 : (size_ < other.size_ ? -1 : 1); - return result; - } - - FMT_CONSTEXPR_CHAR_TRAITS friend auto operator==(basic_string_view lhs, - basic_string_view rhs) - -> bool { - return lhs.compare(rhs) == 0; - } - friend auto operator!=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) != 0; - } - friend auto operator<(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) < 0; - } - friend auto operator<=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) <= 0; - } - friend auto operator>(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) > 0; - } - friend auto operator>=(basic_string_view lhs, basic_string_view rhs) -> bool { - return lhs.compare(rhs) >= 0; - } -}; - -using string_view = basic_string_view; - -/** Specifies if ``T`` is a character type. Can be specialized by users. */ -template struct is_char : std::false_type {}; -template <> struct is_char : std::true_type {}; - -FMT_BEGIN_DETAIL_NAMESPACE - -// A base class for compile-time strings. -struct compile_string {}; - -template -struct is_compile_string : std::is_base_of {}; - -// Returns a string view of `s`. -template ::value)> -FMT_INLINE auto to_string_view(const Char* s) -> basic_string_view { - return s; -} -template -inline auto to_string_view(const std::basic_string& s) - -> basic_string_view { - return s; -} -template -constexpr auto to_string_view(basic_string_view s) - -> basic_string_view { - return s; -} -template >::value)> -inline auto to_string_view(std_string_view s) -> basic_string_view { - return s; -} -template ::value)> -constexpr auto to_string_view(const S& s) - -> basic_string_view { - return basic_string_view(s); -} -void to_string_view(...); - -// Specifies whether S is a string type convertible to fmt::basic_string_view. -// It should be a constexpr function but MSVC 2017 fails to compile it in -// enable_if and MSVC 2015 fails to compile it as an alias template. -// ADL invocation of to_string_view is DEPRECATED! -template -struct is_string : std::is_class()))> { -}; - -template struct char_t_impl {}; -template struct char_t_impl::value>> { - using result = decltype(to_string_view(std::declval())); - using type = typename result::value_type; -}; - -enum class type { - none_type, - // Integer types should go first, - int_type, - uint_type, - long_long_type, - ulong_long_type, - int128_type, - uint128_type, - bool_type, - char_type, - last_integer_type = char_type, - // followed by floating-point types. - float_type, - double_type, - long_double_type, - last_numeric_type = long_double_type, - cstring_type, - string_type, - pointer_type, - custom_type -}; - -// Maps core type T to the corresponding type enum constant. -template -struct type_constant : std::integral_constant {}; - -#define FMT_TYPE_CONSTANT(Type, constant) \ - template \ - struct type_constant \ - : std::integral_constant {} - -FMT_TYPE_CONSTANT(int, int_type); -FMT_TYPE_CONSTANT(unsigned, uint_type); -FMT_TYPE_CONSTANT(long long, long_long_type); -FMT_TYPE_CONSTANT(unsigned long long, ulong_long_type); -FMT_TYPE_CONSTANT(int128_opt, int128_type); -FMT_TYPE_CONSTANT(uint128_opt, uint128_type); -FMT_TYPE_CONSTANT(bool, bool_type); -FMT_TYPE_CONSTANT(Char, char_type); -FMT_TYPE_CONSTANT(float, float_type); -FMT_TYPE_CONSTANT(double, double_type); -FMT_TYPE_CONSTANT(long double, long_double_type); -FMT_TYPE_CONSTANT(const Char*, cstring_type); -FMT_TYPE_CONSTANT(basic_string_view, string_type); -FMT_TYPE_CONSTANT(const void*, pointer_type); - -constexpr bool is_integral_type(type t) { - return t > type::none_type && t <= type::last_integer_type; -} - -constexpr bool is_arithmetic_type(type t) { - return t > type::none_type && t <= type::last_numeric_type; -} - -FMT_NORETURN FMT_API void throw_format_error(const char* message); - -struct error_handler { - constexpr error_handler() = default; - constexpr error_handler(const error_handler&) = default; - - // This function is intentionally not constexpr to give a compile-time error. - FMT_NORETURN void on_error(const char* message) { - throw_format_error(message); - } -}; -FMT_END_DETAIL_NAMESPACE - -/** String's character type. */ -template using char_t = typename detail::char_t_impl::type; - -/** - \rst - Parsing context consisting of a format string range being parsed and an - argument counter for automatic indexing. - You can use the ``format_parse_context`` type alias for ``char`` instead. - \endrst - */ -template -class basic_format_parse_context : private ErrorHandler { - private: - basic_string_view format_str_; - int next_arg_id_; - - FMT_CONSTEXPR void do_check_arg_id(int id); - - public: - using char_type = Char; - using iterator = typename basic_string_view::iterator; - - explicit constexpr basic_format_parse_context( - basic_string_view format_str, ErrorHandler eh = {}, - int next_arg_id = 0) - : ErrorHandler(eh), format_str_(format_str), next_arg_id_(next_arg_id) {} - - /** - Returns an iterator to the beginning of the format string range being - parsed. - */ - constexpr auto begin() const noexcept -> iterator { - return format_str_.begin(); - } - - /** - Returns an iterator past the end of the format string range being parsed. - */ - constexpr auto end() const noexcept -> iterator { return format_str_.end(); } - - /** Advances the begin iterator to ``it``. */ - FMT_CONSTEXPR void advance_to(iterator it) { - format_str_.remove_prefix(detail::to_unsigned(it - begin())); - } - - /** - Reports an error if using the manual argument indexing; otherwise returns - the next argument index and switches to the automatic indexing. - */ - FMT_CONSTEXPR auto next_arg_id() -> int { - if (next_arg_id_ < 0) { - on_error("cannot switch from manual to automatic argument indexing"); - return 0; - } - int id = next_arg_id_++; - do_check_arg_id(id); - return id; - } - - /** - Reports an error if using the automatic argument indexing; otherwise - switches to the manual indexing. - */ - FMT_CONSTEXPR void check_arg_id(int id) { - if (next_arg_id_ > 0) { - on_error("cannot switch from automatic to manual argument indexing"); - return; - } - next_arg_id_ = -1; - do_check_arg_id(id); - } - FMT_CONSTEXPR void check_arg_id(basic_string_view) {} - FMT_CONSTEXPR void check_dynamic_spec(int arg_id); - - FMT_CONSTEXPR void on_error(const char* message) { - ErrorHandler::on_error(message); - } - - constexpr auto error_handler() const -> ErrorHandler { return *this; } -}; - -using format_parse_context = basic_format_parse_context; - -FMT_BEGIN_DETAIL_NAMESPACE -// A parse context with extra data used only in compile-time checks. -template -class compile_parse_context - : public basic_format_parse_context { - private: - int num_args_; - const type* types_; - using base = basic_format_parse_context; - - public: - explicit FMT_CONSTEXPR compile_parse_context( - basic_string_view format_str, int num_args, const type* types, - ErrorHandler eh = {}, int next_arg_id = 0) - : base(format_str, eh, next_arg_id), num_args_(num_args), types_(types) {} - - constexpr auto num_args() const -> int { return num_args_; } - constexpr auto arg_type(int id) const -> type { return types_[id]; } - - FMT_CONSTEXPR auto next_arg_id() -> int { - int id = base::next_arg_id(); - if (id >= num_args_) this->on_error("argument not found"); - return id; - } - - FMT_CONSTEXPR void check_arg_id(int id) { - base::check_arg_id(id); - if (id >= num_args_) this->on_error("argument not found"); - } - using base::check_arg_id; - - FMT_CONSTEXPR void check_dynamic_spec(int arg_id) { - detail::ignore_unused(arg_id); -#if !defined(__LCC__) - if (arg_id < num_args_ && types_ && !is_integral_type(types_[arg_id])) - this->on_error("width/precision is not integer"); -#endif - } -}; -FMT_END_DETAIL_NAMESPACE - -template -FMT_CONSTEXPR void -basic_format_parse_context::do_check_arg_id(int id) { - // Argument id is only checked at compile-time during parsing because - // formatting has its own validation. - if (detail::is_constant_evaluated() && FMT_GCC_VERSION >= 1200) { - using context = detail::compile_parse_context; - if (id >= static_cast(this)->num_args()) - on_error("argument not found"); - } -} - -template -FMT_CONSTEXPR void -basic_format_parse_context::check_dynamic_spec(int arg_id) { - if (detail::is_constant_evaluated()) { - using context = detail::compile_parse_context; - static_cast(this)->check_dynamic_spec(arg_id); - } -} - -template class basic_format_arg; -template class basic_format_args; -template class dynamic_format_arg_store; - -// A formatter for objects of type T. -template -struct formatter { - // A deleted default constructor indicates a disabled formatter. - formatter() = delete; -}; - -// Specifies if T has an enabled formatter specialization. A type can be -// formattable even if it doesn't have a formatter e.g. via a conversion. -template -using has_formatter = - std::is_constructible>; - -// Checks whether T is a container with contiguous storage. -template struct is_contiguous : std::false_type {}; -template -struct is_contiguous> : std::true_type {}; - -class appender; - -FMT_BEGIN_DETAIL_NAMESPACE - -template -constexpr auto has_const_formatter_impl(T*) - -> decltype(typename Context::template formatter_type().format( - std::declval(), std::declval()), - true) { - return true; -} -template -constexpr auto has_const_formatter_impl(...) -> bool { - return false; -} -template -constexpr auto has_const_formatter() -> bool { - return has_const_formatter_impl(static_cast(nullptr)); -} - -// Extracts a reference to the container from back_insert_iterator. -template -inline auto get_container(std::back_insert_iterator it) - -> Container& { - using base = std::back_insert_iterator; - struct accessor : base { - accessor(base b) : base(b) {} - using base::container; - }; - return *accessor(it).container; -} - -template -FMT_CONSTEXPR auto copy_str(InputIt begin, InputIt end, OutputIt out) - -> OutputIt { - while (begin != end) *out++ = static_cast(*begin++); - return out; -} - -template , U>::value&& is_char::value)> -FMT_CONSTEXPR auto copy_str(T* begin, T* end, U* out) -> U* { - if (is_constant_evaluated()) return copy_str(begin, end, out); - auto size = to_unsigned(end - begin); - memcpy(out, begin, size * sizeof(U)); - return out + size; -} - -/** - \rst - A contiguous memory buffer with an optional growing ability. It is an internal - class and shouldn't be used directly, only via `~fmt::basic_memory_buffer`. - \endrst - */ -template class buffer { - private: - T* ptr_; - size_t size_; - size_t capacity_; - - protected: - // Don't initialize ptr_ since it is not accessed to save a few cycles. - FMT_MSC_WARNING(suppress : 26495) - buffer(size_t sz) noexcept : size_(sz), capacity_(sz) {} - - FMT_CONSTEXPR20 buffer(T* p = nullptr, size_t sz = 0, size_t cap = 0) noexcept - : ptr_(p), size_(sz), capacity_(cap) {} - - FMT_CONSTEXPR20 ~buffer() = default; - buffer(buffer&&) = default; - - /** Sets the buffer data and capacity. */ - FMT_CONSTEXPR void set(T* buf_data, size_t buf_capacity) noexcept { - ptr_ = buf_data; - capacity_ = buf_capacity; - } - - /** Increases the buffer capacity to hold at least *capacity* elements. */ - virtual FMT_CONSTEXPR20 void grow(size_t capacity) = 0; - - public: - using value_type = T; - using const_reference = const T&; - - buffer(const buffer&) = delete; - void operator=(const buffer&) = delete; - - FMT_INLINE auto begin() noexcept -> T* { return ptr_; } - FMT_INLINE auto end() noexcept -> T* { return ptr_ + size_; } - - FMT_INLINE auto begin() const noexcept -> const T* { return ptr_; } - FMT_INLINE auto end() const noexcept -> const T* { return ptr_ + size_; } - - /** Returns the size of this buffer. */ - constexpr auto size() const noexcept -> size_t { return size_; } - - /** Returns the capacity of this buffer. */ - constexpr auto capacity() const noexcept -> size_t { return capacity_; } - - /** Returns a pointer to the buffer data. */ - FMT_CONSTEXPR auto data() noexcept -> T* { return ptr_; } - - /** Returns a pointer to the buffer data. */ - FMT_CONSTEXPR auto data() const noexcept -> const T* { return ptr_; } - - /** Clears this buffer. */ - void clear() { size_ = 0; } - - // Tries resizing the buffer to contain *count* elements. If T is a POD type - // the new elements may not be initialized. - FMT_CONSTEXPR20 void try_resize(size_t count) { - try_reserve(count); - size_ = count <= capacity_ ? count : capacity_; - } - - // Tries increasing the buffer capacity to *new_capacity*. It can increase the - // capacity by a smaller amount than requested but guarantees there is space - // for at least one additional element either by increasing the capacity or by - // flushing the buffer if it is full. - FMT_CONSTEXPR20 void try_reserve(size_t new_capacity) { - if (new_capacity > capacity_) grow(new_capacity); - } - - FMT_CONSTEXPR20 void push_back(const T& value) { - try_reserve(size_ + 1); - ptr_[size_++] = value; - } - - /** Appends data to the end of the buffer. */ - template void append(const U* begin, const U* end); - - template FMT_CONSTEXPR auto operator[](Idx index) -> T& { - return ptr_[index]; - } - template - FMT_CONSTEXPR auto operator[](Idx index) const -> const T& { - return ptr_[index]; - } -}; - -struct buffer_traits { - explicit buffer_traits(size_t) {} - auto count() const -> size_t { return 0; } - auto limit(size_t size) -> size_t { return size; } -}; - -class fixed_buffer_traits { - private: - size_t count_ = 0; - size_t limit_; - - public: - explicit fixed_buffer_traits(size_t limit) : limit_(limit) {} - auto count() const -> size_t { return count_; } - auto limit(size_t size) -> size_t { - size_t n = limit_ > count_ ? limit_ - count_ : 0; - count_ += size; - return size < n ? size : n; - } -}; - -// A buffer that writes to an output iterator when flushed. -template -class iterator_buffer final : public Traits, public buffer { - private: - OutputIt out_; - enum { buffer_size = 256 }; - T data_[buffer_size]; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == buffer_size) flush(); - } - - void flush() { - auto size = this->size(); - this->clear(); - out_ = copy_str(data_, data_ + this->limit(size), out_); - } - - public: - explicit iterator_buffer(OutputIt out, size_t n = buffer_size) - : Traits(n), buffer(data_, 0, buffer_size), out_(out) {} - iterator_buffer(iterator_buffer&& other) - : Traits(other), buffer(data_, 0, buffer_size), out_(other.out_) {} - ~iterator_buffer() { flush(); } - - auto out() -> OutputIt { - flush(); - return out_; - } - auto count() const -> size_t { return Traits::count() + this->size(); } -}; - -template -class iterator_buffer final - : public fixed_buffer_traits, - public buffer { - private: - T* out_; - enum { buffer_size = 256 }; - T data_[buffer_size]; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() == this->capacity()) flush(); - } - - void flush() { - size_t n = this->limit(this->size()); - if (this->data() == out_) { - out_ += n; - this->set(data_, buffer_size); - } - this->clear(); - } - - public: - explicit iterator_buffer(T* out, size_t n = buffer_size) - : fixed_buffer_traits(n), buffer(out, 0, n), out_(out) {} - iterator_buffer(iterator_buffer&& other) - : fixed_buffer_traits(other), - buffer(std::move(other)), - out_(other.out_) { - if (this->data() != out_) { - this->set(data_, buffer_size); - this->clear(); - } - } - ~iterator_buffer() { flush(); } - - auto out() -> T* { - flush(); - return out_; - } - auto count() const -> size_t { - return fixed_buffer_traits::count() + this->size(); - } -}; - -template class iterator_buffer final : public buffer { - protected: - FMT_CONSTEXPR20 void grow(size_t) override {} - - public: - explicit iterator_buffer(T* out, size_t = 0) : buffer(out, 0, ~size_t()) {} - - auto out() -> T* { return &*this->end(); } -}; - -// A buffer that writes to a container with the contiguous storage. -template -class iterator_buffer, - enable_if_t::value, - typename Container::value_type>> - final : public buffer { - private: - Container& container_; - - protected: - FMT_CONSTEXPR20 void grow(size_t capacity) override { - container_.resize(capacity); - this->set(&container_[0], capacity); - } - - public: - explicit iterator_buffer(Container& c) - : buffer(c.size()), container_(c) {} - explicit iterator_buffer(std::back_insert_iterator out, size_t = 0) - : iterator_buffer(get_container(out)) {} - - auto out() -> std::back_insert_iterator { - return std::back_inserter(container_); - } -}; - -// A buffer that counts the number of code units written discarding the output. -template class counting_buffer final : public buffer { - private: - enum { buffer_size = 256 }; - T data_[buffer_size]; - size_t count_ = 0; - - protected: - FMT_CONSTEXPR20 void grow(size_t) override { - if (this->size() != buffer_size) return; - count_ += this->size(); - this->clear(); - } - - public: - counting_buffer() : buffer(data_, 0, buffer_size) {} - - auto count() -> size_t { return count_ + this->size(); } -}; - -template -using buffer_appender = conditional_t::value, appender, - std::back_insert_iterator>>; - -// Maps an output iterator to a buffer. -template -auto get_buffer(OutputIt out) -> iterator_buffer { - return iterator_buffer(out); -} -template , Buf>::value)> -auto get_buffer(std::back_insert_iterator out) -> buffer& { - return get_container(out); -} - -template -FMT_INLINE auto get_iterator(Buf& buf, OutputIt) -> decltype(buf.out()) { - return buf.out(); -} -template -auto get_iterator(buffer&, OutputIt out) -> OutputIt { - return out; -} - -template -struct fallback_formatter { - fallback_formatter() = delete; -}; - -// Specifies if T has an enabled fallback_formatter specialization. -template -using has_fallback_formatter = -#ifdef FMT_DEPRECATED_OSTREAM - std::is_constructible>; -#else - std::false_type; -#endif - -struct view {}; - -template struct named_arg : view { - const Char* name; - const T& value; - named_arg(const Char* n, const T& v) : name(n), value(v) {} -}; - -template struct named_arg_info { - const Char* name; - int id; -}; - -template -struct arg_data { - // args_[0].named_args points to named_args_ to avoid bloating format_args. - // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. - T args_[1 + (NUM_ARGS != 0 ? NUM_ARGS : +1)]; - named_arg_info named_args_[NUM_NAMED_ARGS]; - - template - arg_data(const U&... init) : args_{T(named_args_, NUM_NAMED_ARGS), init...} {} - arg_data(const arg_data& other) = delete; - auto args() const -> const T* { return args_ + 1; } - auto named_args() -> named_arg_info* { return named_args_; } -}; - -template -struct arg_data { - // +1 to workaround a bug in gcc 7.5 that causes duplicated-branches warning. - T args_[NUM_ARGS != 0 ? NUM_ARGS : +1]; - - template - FMT_CONSTEXPR FMT_INLINE arg_data(const U&... init) : args_{init...} {} - FMT_CONSTEXPR FMT_INLINE auto args() const -> const T* { return args_; } - FMT_CONSTEXPR FMT_INLINE auto named_args() -> std::nullptr_t { - return nullptr; - } -}; - -template -inline void init_named_args(named_arg_info*, int, int) {} - -template struct is_named_arg : std::false_type {}; -template struct is_statically_named_arg : std::false_type {}; - -template -struct is_named_arg> : std::true_type {}; - -template ::value)> -void init_named_args(named_arg_info* named_args, int arg_count, - int named_arg_count, const T&, const Tail&... args) { - init_named_args(named_args, arg_count + 1, named_arg_count, args...); -} - -template ::value)> -void init_named_args(named_arg_info* named_args, int arg_count, - int named_arg_count, const T& arg, const Tail&... args) { - named_args[named_arg_count++] = {arg.name, arg_count}; - init_named_args(named_args, arg_count + 1, named_arg_count, args...); -} - -template -FMT_CONSTEXPR FMT_INLINE void init_named_args(std::nullptr_t, int, int, - const Args&...) {} - -template constexpr auto count() -> size_t { return B ? 1 : 0; } -template constexpr auto count() -> size_t { - return (B1 ? 1 : 0) + count(); -} - -template constexpr auto count_named_args() -> size_t { - return count::value...>(); -} - -template -constexpr auto count_statically_named_args() -> size_t { - return count::value...>(); -} - -struct unformattable {}; -struct unformattable_char : unformattable {}; -struct unformattable_const : unformattable {}; -struct unformattable_pointer : unformattable {}; - -template struct string_value { - const Char* data; - size_t size; -}; - -template struct named_arg_value { - const named_arg_info* data; - size_t size; -}; - -template struct custom_value { - using parse_context = typename Context::parse_context_type; - void* value; - void (*format)(void* arg, parse_context& parse_ctx, Context& ctx); -}; - -// A formatting argument value. -template class value { - public: - using char_type = typename Context::char_type; - - union { - monostate no_value; - int int_value; - unsigned uint_value; - long long long_long_value; - unsigned long long ulong_long_value; - int128_opt int128_value; - uint128_opt uint128_value; - bool bool_value; - char_type char_value; - float float_value; - double double_value; - long double long_double_value; - const void* pointer; - string_value string; - custom_value custom; - named_arg_value named_args; - }; - - constexpr FMT_INLINE value() : no_value() {} - constexpr FMT_INLINE value(int val) : int_value(val) {} - constexpr FMT_INLINE value(unsigned val) : uint_value(val) {} - constexpr FMT_INLINE value(long long val) : long_long_value(val) {} - constexpr FMT_INLINE value(unsigned long long val) : ulong_long_value(val) {} - FMT_INLINE value(int128_opt val) : int128_value(val) {} - FMT_INLINE value(uint128_opt val) : uint128_value(val) {} - constexpr FMT_INLINE value(float val) : float_value(val) {} - constexpr FMT_INLINE value(double val) : double_value(val) {} - FMT_INLINE value(long double val) : long_double_value(val) {} - constexpr FMT_INLINE value(bool val) : bool_value(val) {} - constexpr FMT_INLINE value(char_type val) : char_value(val) {} - FMT_CONSTEXPR FMT_INLINE value(const char_type* val) { - string.data = val; - if (is_constant_evaluated()) string.size = {}; - } - FMT_CONSTEXPR FMT_INLINE value(basic_string_view val) { - string.data = val.data(); - string.size = val.size(); - } - FMT_INLINE value(const void* val) : pointer(val) {} - FMT_INLINE value(const named_arg_info* args, size_t size) - : named_args{args, size} {} - - template FMT_CONSTEXPR FMT_INLINE value(T& val) { - using value_type = remove_cvref_t; - custom.value = const_cast(&val); - // Get the formatter type through the context to allow different contexts - // have different extension points, e.g. `formatter` for `format` and - // `printf_formatter` for `printf`. - custom.format = format_custom_arg< - value_type, - conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>>; - } - value(unformattable); - value(unformattable_char); - value(unformattable_const); - value(unformattable_pointer); - - private: - // Formats an argument of a custom type, such as a user-defined class. - template - static void format_custom_arg(void* arg, - typename Context::parse_context_type& parse_ctx, - Context& ctx) { - auto f = Formatter(); - parse_ctx.advance_to(f.parse(parse_ctx)); - using qualified_type = - conditional_t(), const T, T>; - ctx.advance_to(f.format(*static_cast(arg), ctx)); - } -}; - -template -FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg; - -// To minimize the number of types we need to deal with, long is translated -// either to int or to long long depending on its size. -enum { long_short = sizeof(long) == sizeof(int) }; -using long_type = conditional_t; -using ulong_type = conditional_t; - -#ifdef __cpp_lib_byte -inline auto format_as(std::byte b) -> unsigned char { - return static_cast(b); -} -#endif - -template struct has_format_as { - template ::value&& std::is_integral::value)> - static auto check(U*) -> std::true_type; - static auto check(...) -> std::false_type; - - enum { value = decltype(check(static_cast(nullptr)))::value }; -}; - -// Maps formatting arguments to core types. -// arg_mapper reports errors by returning unformattable instead of using -// static_assert because it's used in the is_formattable trait. -template struct arg_mapper { - using char_type = typename Context::char_type; - - FMT_CONSTEXPR FMT_INLINE auto map(signed char val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned char val) -> unsigned { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(short val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned short val) -> unsigned { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(int val) -> int { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned val) -> unsigned { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(long val) -> long_type { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned long val) -> ulong_type { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(long long val) -> long long { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(unsigned long long val) - -> unsigned long long { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(int128_opt val) -> int128_opt { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(uint128_opt val) -> uint128_opt { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(bool val) -> bool { return val; } - - template ::value || - std::is_same::value)> - FMT_CONSTEXPR FMT_INLINE auto map(T val) -> char_type { - return val; - } - template ::value || -#ifdef __cpp_char8_t - std::is_same::value || -#endif - std::is_same::value || - std::is_same::value) && - !std::is_same::value, - int> = 0> - FMT_CONSTEXPR FMT_INLINE auto map(T) -> unformattable_char { - return {}; - } - - FMT_CONSTEXPR FMT_INLINE auto map(float val) -> float { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(double val) -> double { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(long double val) -> long double { - return val; - } - - FMT_CONSTEXPR FMT_INLINE auto map(char_type* val) -> const char_type* { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(const char_type* val) -> const char_type* { - return val; - } - template ::value && !std::is_pointer::value && - std::is_same>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return to_string_view(val); - } - template ::value && !std::is_pointer::value && - !std::is_same>::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T&) -> unformattable_char { - return {}; - } - template >::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return basic_string_view(val); - } - template >::value && - !std::is_convertible>::value && - !is_string::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> basic_string_view { - return std_string_view(val); - } - - FMT_CONSTEXPR FMT_INLINE auto map(void* val) -> const void* { return val; } - FMT_CONSTEXPR FMT_INLINE auto map(const void* val) -> const void* { - return val; - } - FMT_CONSTEXPR FMT_INLINE auto map(std::nullptr_t val) -> const void* { - return val; - } - - // We use SFINAE instead of a const T* parameter to avoid conflicting with - // the C array overload. - template < - typename T, - FMT_ENABLE_IF( - std::is_pointer::value || std::is_member_pointer::value || - std::is_function::type>::value || - (std::is_convertible::value && - !std::is_convertible::value && - !has_formatter::value))> - FMT_CONSTEXPR auto map(const T&) -> unformattable_pointer { - return {}; - } - - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T (&values)[N]) -> const T (&)[N] { - return values; - } - - template ::value&& std::is_convertible::value && - !has_format_as::value && !has_formatter::value && - !has_fallback_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> decltype(std::declval().map( - static_cast>(val))) { - return map(static_cast>(val)); - } - - template ::value && - !has_formatter::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& val) - -> decltype(std::declval().map(format_as(T()))) { - return map(format_as(val)); - } - - template > - struct formattable - : bool_constant() || - !std::is_const>::value || - has_fallback_formatter::value> {}; - -#if (FMT_MSC_VERSION != 0 && FMT_MSC_VERSION < 1910) || \ - FMT_ICC_VERSION != 0 || defined(__NVCC__) - // Workaround a bug in MSVC and Intel (Issue 2746). - template FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { - return val; - } -#else - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto do_map(T&& val) -> T& { - return val; - } - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto do_map(T&&) -> unformattable_const { - return {}; - } -#endif - - template , - FMT_ENABLE_IF(!is_string::value && !is_char::value && - !std::is_array::value && - !std::is_pointer::value && - !has_format_as::value && - (has_formatter::value || - has_fallback_formatter::value))> - FMT_CONSTEXPR FMT_INLINE auto map(T&& val) - -> decltype(this->do_map(std::forward(val))) { - return do_map(std::forward(val)); - } - - template ::value)> - FMT_CONSTEXPR FMT_INLINE auto map(const T& named_arg) - -> decltype(std::declval().map(named_arg.value)) { - return map(named_arg.value); - } - - auto map(...) -> unformattable { return {}; } -}; - -// A type constant after applying arg_mapper. -template -using mapped_type_constant = - type_constant().map(std::declval())), - typename Context::char_type>; - -enum { packed_arg_bits = 4 }; -// Maximum number of arguments with packed types. -enum { max_packed_args = 62 / packed_arg_bits }; -enum : unsigned long long { is_unpacked_bit = 1ULL << 63 }; -enum : unsigned long long { has_named_args_bit = 1ULL << 62 }; - -FMT_END_DETAIL_NAMESPACE - -// An output iterator that appends to a buffer. -// It is used to reduce symbol sizes for the common case. -class appender : public std::back_insert_iterator> { - using base = std::back_insert_iterator>; - - public: - using std::back_insert_iterator>::back_insert_iterator; - appender(base it) noexcept : base(it) {} - FMT_UNCHECKED_ITERATOR(appender); - - auto operator++() noexcept -> appender& { return *this; } - auto operator++(int) noexcept -> appender { return *this; } -}; - -// A formatting argument. It is a trivially copyable/constructible type to -// allow storage in basic_memory_buffer. -template class basic_format_arg { - private: - detail::value value_; - detail::type type_; - - template - friend FMT_CONSTEXPR auto detail::make_arg(T&& value) - -> basic_format_arg; - - template - friend FMT_CONSTEXPR auto visit_format_arg(Visitor&& vis, - const basic_format_arg& arg) - -> decltype(vis(0)); - - friend class basic_format_args; - friend class dynamic_format_arg_store; - - using char_type = typename Context::char_type; - - template - friend struct detail::arg_data; - - basic_format_arg(const detail::named_arg_info* args, size_t size) - : value_(args, size) {} - - public: - class handle { - public: - explicit handle(detail::custom_value custom) : custom_(custom) {} - - void format(typename Context::parse_context_type& parse_ctx, - Context& ctx) const { - custom_.format(custom_.value, parse_ctx, ctx); - } - - private: - detail::custom_value custom_; - }; - - constexpr basic_format_arg() : type_(detail::type::none_type) {} - - constexpr explicit operator bool() const noexcept { - return type_ != detail::type::none_type; - } - - auto type() const -> detail::type { return type_; } - - auto is_integral() const -> bool { return detail::is_integral_type(type_); } - auto is_arithmetic() const -> bool { - return detail::is_arithmetic_type(type_); - } -}; - -/** - \rst - Visits an argument dispatching to the appropriate visit method based on - the argument type. For example, if the argument type is ``double`` then - ``vis(value)`` will be called with the value of type ``double``. - \endrst - */ -#if FMT_ICC_VERSION != 0 -#pragma warning(disable : 1595) -#endif -template -FMT_CONSTEXPR FMT_INLINE auto visit_format_arg( - Visitor&& vis, const basic_format_arg& arg) -> decltype(vis(0)) { - switch (arg.type_) { - case detail::type::none_type: - break; - case detail::type::int_type: - return vis(arg.value_.int_value); - case detail::type::uint_type: - return vis(arg.value_.uint_value); - case detail::type::long_long_type: - return vis(arg.value_.long_long_value); - case detail::type::ulong_long_type: - return vis(arg.value_.ulong_long_value); - case detail::type::int128_type: - return vis(detail::convert_for_visit(arg.value_.int128_value)); - case detail::type::uint128_type: - return vis(detail::convert_for_visit(arg.value_.uint128_value)); - case detail::type::bool_type: - return vis(arg.value_.bool_value); - case detail::type::char_type: - return vis(arg.value_.char_value); - case detail::type::float_type: - return vis(arg.value_.float_value); - case detail::type::double_type: - return vis(arg.value_.double_value); - case detail::type::long_double_type: - return vis(arg.value_.long_double_value); - case detail::type::cstring_type: - return vis(arg.value_.string.data); - case detail::type::string_type: - using sv = basic_string_view; - return vis(sv(arg.value_.string.data, arg.value_.string.size)); - case detail::type::pointer_type: - return vis(arg.value_.pointer); - case detail::type::custom_type: - return vis(typename basic_format_arg::handle(arg.value_.custom)); - } - return vis(monostate()); -} - -FMT_BEGIN_DETAIL_NAMESPACE - -template -auto copy_str(InputIt begin, InputIt end, appender out) -> appender { - get_container(out).append(begin, end); - return out; -} - -template -FMT_CONSTEXPR auto copy_str(R&& rng, OutputIt out) -> OutputIt { - return detail::copy_str(rng.begin(), rng.end(), out); -} - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 500 -// A workaround for gcc 4.8 to make void_t work in a SFINAE context. -template struct void_t_impl { using type = void; }; -template -using void_t = typename detail::void_t_impl::type; -#else -template using void_t = void; -#endif - -template -struct is_output_iterator : std::false_type {}; - -template -struct is_output_iterator< - It, T, - void_t::iterator_category, - decltype(*std::declval() = std::declval())>> - : std::true_type {}; - -template -struct is_back_insert_iterator : std::false_type {}; -template -struct is_back_insert_iterator> - : std::true_type {}; - -template -struct is_contiguous_back_insert_iterator : std::false_type {}; -template -struct is_contiguous_back_insert_iterator> - : is_contiguous {}; -template <> -struct is_contiguous_back_insert_iterator : std::true_type {}; - -// A type-erased reference to an std::locale to avoid a heavy include. -class locale_ref { - private: - const void* locale_; // A type-erased pointer to std::locale. - - public: - constexpr FMT_INLINE locale_ref() : locale_(nullptr) {} - template explicit locale_ref(const Locale& loc); - - explicit operator bool() const noexcept { return locale_ != nullptr; } - - template auto get() const -> Locale; -}; - -template constexpr auto encode_types() -> unsigned long long { - return 0; -} - -template -constexpr auto encode_types() -> unsigned long long { - return static_cast(mapped_type_constant::value) | - (encode_types() << packed_arg_bits); -} - -template -FMT_CONSTEXPR FMT_INLINE auto make_value(T&& val) -> value { - const auto& arg = arg_mapper().map(FMT_FORWARD(val)); - - constexpr bool formattable_char = - !std::is_same::value; - static_assert(formattable_char, "Mixing character types is disallowed."); - - constexpr bool formattable_const = - !std::is_same::value; - static_assert(formattable_const, "Cannot format a const argument."); - - // Formatting of arbitrary pointers is disallowed. If you want to output - // a pointer cast it to "void *" or "const void *". In particular, this - // forbids formatting of "[const] volatile char *" which is printed as bool - // by iostreams. - constexpr bool formattable_pointer = - !std::is_same::value; - static_assert(formattable_pointer, - "Formatting of non-void pointers is disallowed."); - - constexpr bool formattable = - !std::is_same::value; - static_assert( - formattable, - "Cannot format an argument. To make type T formattable provide a " - "formatter specialization: https://fmt.dev/latest/api.html#udt"); - return {arg}; -} - -template -FMT_CONSTEXPR auto make_arg(T&& value) -> basic_format_arg { - basic_format_arg arg; - arg.type_ = mapped_type_constant::value; - arg.value_ = make_value(value); - return arg; -} - -// The type template parameter is there to avoid an ODR violation when using -// a fallback formatter in one translation unit and an implicit conversion in -// another (not recommended). -template -FMT_CONSTEXPR FMT_INLINE auto make_arg(T&& val) -> value { - return make_value(val); -} - -template -FMT_CONSTEXPR inline auto make_arg(T&& value) -> basic_format_arg { - return make_arg(value); -} -FMT_END_DETAIL_NAMESPACE - -// Formatting context. -template class basic_format_context { - public: - /** The character type for the output. */ - using char_type = Char; - - private: - OutputIt out_; - basic_format_args args_; - detail::locale_ref loc_; - - public: - using iterator = OutputIt; - using format_arg = basic_format_arg; - using parse_context_type = basic_format_parse_context; - template using formatter_type = formatter; - - basic_format_context(basic_format_context&&) = default; - basic_format_context(const basic_format_context&) = delete; - void operator=(const basic_format_context&) = delete; - /** - Constructs a ``basic_format_context`` object. References to the arguments are - stored in the object so make sure they have appropriate lifetimes. - */ - constexpr basic_format_context( - OutputIt out, basic_format_args ctx_args, - detail::locale_ref loc = detail::locale_ref()) - : out_(out), args_(ctx_args), loc_(loc) {} - - constexpr auto arg(int id) const -> format_arg { return args_.get(id); } - FMT_CONSTEXPR auto arg(basic_string_view name) -> format_arg { - return args_.get(name); - } - FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { - return args_.get_id(name); - } - auto args() const -> const basic_format_args& { - return args_; - } - - FMT_CONSTEXPR auto error_handler() -> detail::error_handler { return {}; } - void on_error(const char* message) { error_handler().on_error(message); } - - // Returns an iterator to the beginning of the output range. - FMT_CONSTEXPR auto out() -> iterator { return out_; } - - // Advances the begin iterator to ``it``. - void advance_to(iterator it) { - if (!detail::is_back_insert_iterator()) out_ = it; - } - - FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } -}; - -template -using buffer_context = - basic_format_context, Char>; -using format_context = buffer_context; - -// Workaround an alias issue: https://stackoverflow.com/q/62767544/471164. -#define FMT_BUFFER_CONTEXT(Char) \ - basic_format_context, Char> - -template -using is_formattable = bool_constant< - !std::is_base_of>().map( - std::declval()))>::value && - !detail::has_fallback_formatter::value>; - -/** - \rst - An array of references to arguments. It can be implicitly converted into - `~fmt::basic_format_args` for passing into type-erased formatting functions - such as `~fmt::vformat`. - \endrst - */ -template -class format_arg_store -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - // Workaround a GCC template argument substitution bug. - : public basic_format_args -#endif -{ - private: - static const size_t num_args = sizeof...(Args); - static const size_t num_named_args = detail::count_named_args(); - static const bool is_packed = num_args <= detail::max_packed_args; - - using value_type = conditional_t, - basic_format_arg>; - - detail::arg_data - data_; - - friend class basic_format_args; - - static constexpr unsigned long long desc = - (is_packed ? detail::encode_types() - : detail::is_unpacked_bit | num_args) | - (num_named_args != 0 - ? static_cast(detail::has_named_args_bit) - : 0); - - public: - template - FMT_CONSTEXPR FMT_INLINE format_arg_store(T&&... args) - : -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 - basic_format_args(*this), -#endif - data_{detail::make_arg< - is_packed, Context, - detail::mapped_type_constant, Context>::value>( - FMT_FORWARD(args))...} { - detail::init_named_args(data_.named_args(), 0, 0, args...); - } -}; - -/** - \rst - Constructs a `~fmt::format_arg_store` object that contains references to - arguments and can be implicitly converted to `~fmt::format_args`. `Context` - can be omitted in which case it defaults to `~fmt::context`. - See `~fmt::arg` for lifetime considerations. - \endrst - */ -template -constexpr auto make_format_args(Args&&... args) - -> format_arg_store...> { - return {FMT_FORWARD(args)...}; -} - -/** - \rst - Returns a named argument to be used in a formatting function. - It should only be used in a call to a formatting function or - `dynamic_format_arg_store::push_back`. - - **Example**:: - - fmt::print("Elapsed time: {s:.2f} seconds", fmt::arg("s", 1.23)); - \endrst - */ -template -inline auto arg(const Char* name, const T& arg) -> detail::named_arg { - static_assert(!detail::is_named_arg(), "nested named arguments"); - return {name, arg}; -} - -/** - \rst - A view of a collection of formatting arguments. To avoid lifetime issues it - should only be used as a parameter type in type-erased functions such as - ``vformat``:: - - void vlog(string_view format_str, format_args args); // OK - format_args args = make_format_args(42); // Error: dangling reference - \endrst - */ -template class basic_format_args { - public: - using size_type = int; - using format_arg = basic_format_arg; - - private: - // A descriptor that contains information about formatting arguments. - // If the number of arguments is less or equal to max_packed_args then - // argument types are passed in the descriptor. This reduces binary code size - // per formatting function call. - unsigned long long desc_; - union { - // If is_packed() returns true then argument values are stored in values_; - // otherwise they are stored in args_. This is done to improve cache - // locality and reduce compiled code size since storing larger objects - // may require more code (at least on x86-64) even if the same amount of - // data is actually copied to stack. It saves ~10% on the bloat test. - const detail::value* values_; - const format_arg* args_; - }; - - constexpr auto is_packed() const -> bool { - return (desc_ & detail::is_unpacked_bit) == 0; - } - auto has_named_args() const -> bool { - return (desc_ & detail::has_named_args_bit) != 0; - } - - FMT_CONSTEXPR auto type(int index) const -> detail::type { - int shift = index * detail::packed_arg_bits; - unsigned int mask = (1 << detail::packed_arg_bits) - 1; - return static_cast((desc_ >> shift) & mask); - } - - constexpr FMT_INLINE basic_format_args(unsigned long long desc, - const detail::value* values) - : desc_(desc), values_(values) {} - constexpr basic_format_args(unsigned long long desc, const format_arg* args) - : desc_(desc), args_(args) {} - - public: - constexpr basic_format_args() : desc_(0), args_(nullptr) {} - - /** - \rst - Constructs a `basic_format_args` object from `~fmt::format_arg_store`. - \endrst - */ - template - constexpr FMT_INLINE basic_format_args( - const format_arg_store& store) - : basic_format_args(format_arg_store::desc, - store.data_.args()) {} - - /** - \rst - Constructs a `basic_format_args` object from - `~fmt::dynamic_format_arg_store`. - \endrst - */ - constexpr FMT_INLINE basic_format_args( - const dynamic_format_arg_store& store) - : basic_format_args(store.get_types(), store.data()) {} - - /** - \rst - Constructs a `basic_format_args` object from a dynamic set of arguments. - \endrst - */ - constexpr basic_format_args(const format_arg* args, int count) - : basic_format_args(detail::is_unpacked_bit | detail::to_unsigned(count), - args) {} - - /** Returns the argument with the specified id. */ - FMT_CONSTEXPR auto get(int id) const -> format_arg { - format_arg arg; - if (!is_packed()) { - if (id < max_size()) arg = args_[id]; - return arg; - } - if (id >= detail::max_packed_args) return arg; - arg.type_ = type(id); - if (arg.type_ == detail::type::none_type) return arg; - arg.value_ = values_[id]; - return arg; - } - - template - auto get(basic_string_view name) const -> format_arg { - int id = get_id(name); - return id >= 0 ? get(id) : format_arg(); - } - - template - auto get_id(basic_string_view name) const -> int { - if (!has_named_args()) return -1; - const auto& named_args = - (is_packed() ? values_[-1] : args_[-1].value_).named_args; - for (size_t i = 0; i < named_args.size; ++i) { - if (named_args.data[i].name == name) return named_args.data[i].id; - } - return -1; - } - - auto max_size() const -> int { - unsigned long long max_packed = detail::max_packed_args; - return static_cast(is_packed() ? max_packed - : desc_ & ~detail::is_unpacked_bit); - } -}; - -/** An alias to ``basic_format_args``. */ -// A separate type would result in shorter symbols but break ABI compatibility -// between clang and gcc on ARM (#1919). -using format_args = basic_format_args; - -// We cannot use enum classes as bit fields because of a gcc bug, so we put them -// in namespaces instead (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=61414). -// Additionally, if an underlying type is specified, older gcc incorrectly warns -// that the type is too small. Both bugs are fixed in gcc 9.3. -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 903 -# define FMT_ENUM_UNDERLYING_TYPE(type) -#else -# define FMT_ENUM_UNDERLYING_TYPE(type) : type -#endif -namespace align { -enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, left, right, center, - numeric}; -} -using align_t = align::type; -namespace sign { -enum type FMT_ENUM_UNDERLYING_TYPE(unsigned char){none, minus, plus, space}; -} -using sign_t = sign::type; - -FMT_BEGIN_DETAIL_NAMESPACE - -// Workaround an array initialization issue in gcc 4.8. -template struct fill_t { - private: - enum { max_size = 4 }; - Char data_[max_size] = {Char(' '), Char(0), Char(0), Char(0)}; - unsigned char size_ = 1; - - public: - FMT_CONSTEXPR void operator=(basic_string_view s) { - auto size = s.size(); - if (size > max_size) return throw_format_error("invalid fill"); - for (size_t i = 0; i < size; ++i) data_[i] = s[i]; - size_ = static_cast(size); - } - - constexpr auto size() const -> size_t { return size_; } - constexpr auto data() const -> const Char* { return data_; } - - FMT_CONSTEXPR auto operator[](size_t index) -> Char& { return data_[index]; } - FMT_CONSTEXPR auto operator[](size_t index) const -> const Char& { - return data_[index]; - } -}; -FMT_END_DETAIL_NAMESPACE - -enum class presentation_type : unsigned char { - none, - // Integer types should go first, - dec, // 'd' - oct, // 'o' - hex_lower, // 'x' - hex_upper, // 'X' - bin_lower, // 'b' - bin_upper, // 'B' - hexfloat_lower, // 'a' - hexfloat_upper, // 'A' - exp_lower, // 'e' - exp_upper, // 'E' - fixed_lower, // 'f' - fixed_upper, // 'F' - general_lower, // 'g' - general_upper, // 'G' - chr, // 'c' - string, // 's' - pointer, // 'p' - debug // '?' -}; - -// Format specifiers for built-in and string types. -template struct basic_format_specs { - int width; - int precision; - presentation_type type; - align_t align : 4; - sign_t sign : 3; - bool alt : 1; // Alternate form ('#'). - bool localized : 1; - detail::fill_t fill; - - constexpr basic_format_specs() - : width(0), - precision(-1), - type(presentation_type::none), - align(align::none), - sign(sign::none), - alt(false), - localized(false) {} -}; - -using format_specs = basic_format_specs; - -FMT_BEGIN_DETAIL_NAMESPACE - -enum class arg_id_kind { none, index, name }; - -// An argument reference. -template struct arg_ref { - FMT_CONSTEXPR arg_ref() : kind(arg_id_kind::none), val() {} - - FMT_CONSTEXPR explicit arg_ref(int index) - : kind(arg_id_kind::index), val(index) {} - FMT_CONSTEXPR explicit arg_ref(basic_string_view name) - : kind(arg_id_kind::name), val(name) {} - - FMT_CONSTEXPR auto operator=(int idx) -> arg_ref& { - kind = arg_id_kind::index; - val.index = idx; - return *this; - } - - arg_id_kind kind; - union value { - FMT_CONSTEXPR value(int id = 0) : index{id} {} - FMT_CONSTEXPR value(basic_string_view n) : name(n) {} - - int index; - basic_string_view name; - } val; -}; - -// Format specifiers with width and precision resolved at formatting rather -// than parsing time to allow re-using the same parsed specifiers with -// different sets of arguments (precompilation of format strings). -template -struct dynamic_format_specs : basic_format_specs { - arg_ref width_ref; - arg_ref precision_ref; -}; - -struct auto_id {}; - -// A format specifier handler that sets fields in basic_format_specs. -template class specs_setter { - protected: - basic_format_specs& specs_; - - public: - explicit FMT_CONSTEXPR specs_setter(basic_format_specs& specs) - : specs_(specs) {} - - FMT_CONSTEXPR specs_setter(const specs_setter& other) - : specs_(other.specs_) {} - - FMT_CONSTEXPR void on_align(align_t align) { specs_.align = align; } - FMT_CONSTEXPR void on_fill(basic_string_view fill) { - specs_.fill = fill; - } - FMT_CONSTEXPR void on_sign(sign_t s) { specs_.sign = s; } - FMT_CONSTEXPR void on_hash() { specs_.alt = true; } - FMT_CONSTEXPR void on_localized() { specs_.localized = true; } - - FMT_CONSTEXPR void on_zero() { - if (specs_.align == align::none) specs_.align = align::numeric; - specs_.fill[0] = Char('0'); - } - - FMT_CONSTEXPR void on_width(int width) { specs_.width = width; } - FMT_CONSTEXPR void on_precision(int precision) { - specs_.precision = precision; - } - FMT_CONSTEXPR void end_precision() {} - - FMT_CONSTEXPR void on_type(presentation_type type) { specs_.type = type; } -}; - -// Format spec handler that saves references to arguments representing dynamic -// width and precision to be resolved at formatting time. -template -class dynamic_specs_handler - : public specs_setter { - public: - using char_type = typename ParseContext::char_type; - - FMT_CONSTEXPR dynamic_specs_handler(dynamic_format_specs& specs, - ParseContext& ctx) - : specs_setter(specs), specs_(specs), context_(ctx) {} - - FMT_CONSTEXPR dynamic_specs_handler(const dynamic_specs_handler& other) - : specs_setter(other), - specs_(other.specs_), - context_(other.context_) {} - - template FMT_CONSTEXPR void on_dynamic_width(Id arg_id) { - specs_.width_ref = make_arg_ref(arg_id); - } - - template FMT_CONSTEXPR void on_dynamic_precision(Id arg_id) { - specs_.precision_ref = make_arg_ref(arg_id); - } - - FMT_CONSTEXPR void on_error(const char* message) { - context_.on_error(message); - } - - private: - dynamic_format_specs& specs_; - ParseContext& context_; - - using arg_ref_type = arg_ref; - - FMT_CONSTEXPR auto make_arg_ref(int arg_id) -> arg_ref_type { - context_.check_arg_id(arg_id); - context_.check_dynamic_spec(arg_id); - return arg_ref_type(arg_id); - } - - FMT_CONSTEXPR auto make_arg_ref(auto_id) -> arg_ref_type { - int arg_id = context_.next_arg_id(); - context_.check_dynamic_spec(arg_id); - return arg_ref_type(arg_id); - } - - FMT_CONSTEXPR auto make_arg_ref(basic_string_view arg_id) - -> arg_ref_type { - context_.check_arg_id(arg_id); - basic_string_view format_str( - context_.begin(), to_unsigned(context_.end() - context_.begin())); - return arg_ref_type(arg_id); - } -}; - -template constexpr bool is_ascii_letter(Char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); -} - -// Converts a character to ASCII. Returns a number > 127 on conversion failure. -template ::value)> -constexpr auto to_ascii(Char c) -> Char { - return c; -} -template ::value)> -constexpr auto to_ascii(Char c) -> underlying_t { - return c; -} - -FMT_CONSTEXPR inline auto code_point_length_impl(char c) -> int { - return "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4" - [static_cast(c) >> 3]; -} - -template -FMT_CONSTEXPR auto code_point_length(const Char* begin) -> int { - if (const_check(sizeof(Char) != 1)) return 1; - int len = code_point_length_impl(static_cast(*begin)); - - // Compute the pointer to the next character early so that the next - // iteration can start working on the next character. Neither Clang - // nor GCC figure out this reordering on their own. - return len + !len; -} - -// Return the result via the out param to workaround gcc bug 77539. -template -FMT_CONSTEXPR auto find(Ptr first, Ptr last, T value, Ptr& out) -> bool { - for (out = first; out != last; ++out) { - if (*out == value) return true; - } - return false; -} - -template <> -inline auto find(const char* first, const char* last, char value, - const char*& out) -> bool { - out = static_cast( - std::memchr(first, value, to_unsigned(last - first))); - return out != nullptr; -} - -// Parses the range [begin, end) as an unsigned integer. This function assumes -// that the range is non-empty and the first character is a digit. -template -FMT_CONSTEXPR auto parse_nonnegative_int(const Char*& begin, const Char* end, - int error_value) noexcept -> int { - FMT_ASSERT(begin != end && '0' <= *begin && *begin <= '9', ""); - unsigned value = 0, prev = 0; - auto p = begin; - do { - prev = value; - value = value * 10 + unsigned(*p - '0'); - ++p; - } while (p != end && '0' <= *p && *p <= '9'); - auto num_digits = p - begin; - begin = p; - if (num_digits <= std::numeric_limits::digits10) - return static_cast(value); - // Check for overflow. - const unsigned max = to_unsigned((std::numeric_limits::max)()); - return num_digits == std::numeric_limits::digits10 + 1 && - prev * 10ull + unsigned(p[-1] - '0') <= max - ? static_cast(value) - : error_value; -} - -// Parses fill and alignment. -template -FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - FMT_ASSERT(begin != end, ""); - auto align = align::none; - auto p = begin + code_point_length(begin); - if (end - p <= 0) p = begin; - for (;;) { - switch (to_ascii(*p)) { - case '<': - align = align::left; - break; - case '>': - align = align::right; - break; - case '^': - align = align::center; - break; - default: - break; - } - if (align != align::none) { - if (p != begin) { - auto c = *begin; - if (c == '{') - return handler.on_error("invalid fill character '{'"), begin; - if (c == '}') return begin; - handler.on_fill(basic_string_view(begin, to_unsigned(p - begin))); - begin = p + 1; - } else - ++begin; - handler.on_align(align); - break; - } else if (p == begin) { - break; - } - p = begin; - } - return begin; -} - -template FMT_CONSTEXPR bool is_name_start(Char c) { - return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || '_' == c; -} - -template -FMT_CONSTEXPR auto do_parse_arg_id(const Char* begin, const Char* end, - IDHandler&& handler) -> const Char* { - FMT_ASSERT(begin != end, ""); - Char c = *begin; - if (c >= '0' && c <= '9') { - int index = 0; - if (c != '0') - index = - parse_nonnegative_int(begin, end, (std::numeric_limits::max)()); - else - ++begin; - if (begin == end || (*begin != '}' && *begin != ':')) - handler.on_error("invalid format string"); - else - handler(index); - return begin; - } - if (!is_name_start(c)) { - handler.on_error("invalid format string"); - return begin; - } - auto it = begin; - do { - ++it; - } while (it != end && (is_name_start(c = *it) || ('0' <= c && c <= '9'))); - handler(basic_string_view(begin, to_unsigned(it - begin))); - return it; -} - -template -FMT_CONSTEXPR FMT_INLINE auto parse_arg_id(const Char* begin, const Char* end, - IDHandler&& handler) -> const Char* { - Char c = *begin; - if (c != '}' && c != ':') return do_parse_arg_id(begin, end, handler); - handler(); - return begin; -} - -template -FMT_CONSTEXPR auto parse_width(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - using detail::auto_id; - struct width_adapter { - Handler& handler; - - FMT_CONSTEXPR void operator()() { handler.on_dynamic_width(auto_id()); } - FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_width(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - handler.on_dynamic_width(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - FMT_ASSERT(begin != end, ""); - if ('0' <= *begin && *begin <= '9') { - int width = parse_nonnegative_int(begin, end, -1); - if (width != -1) - handler.on_width(width); - else - handler.on_error("number is too big"); - } else if (*begin == '{') { - ++begin; - if (begin != end) begin = parse_arg_id(begin, end, width_adapter{handler}); - if (begin == end || *begin != '}') - return handler.on_error("invalid format string"), begin; - ++begin; - } - return begin; -} - -template -FMT_CONSTEXPR auto parse_precision(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - using detail::auto_id; - struct precision_adapter { - Handler& handler; - - FMT_CONSTEXPR void operator()() { handler.on_dynamic_precision(auto_id()); } - FMT_CONSTEXPR void operator()(int id) { handler.on_dynamic_precision(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - handler.on_dynamic_precision(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - ++begin; - auto c = begin != end ? *begin : Char(); - if ('0' <= c && c <= '9') { - auto precision = parse_nonnegative_int(begin, end, -1); - if (precision != -1) - handler.on_precision(precision); - else - handler.on_error("number is too big"); - } else if (c == '{') { - ++begin; - if (begin != end) - begin = parse_arg_id(begin, end, precision_adapter{handler}); - if (begin == end || *begin++ != '}') - return handler.on_error("invalid format string"), begin; - } else { - return handler.on_error("missing precision specifier"), begin; - } - handler.end_precision(); - return begin; -} - -template -FMT_CONSTEXPR auto parse_presentation_type(Char type) -> presentation_type { - switch (to_ascii(type)) { - case 'd': - return presentation_type::dec; - case 'o': - return presentation_type::oct; - case 'x': - return presentation_type::hex_lower; - case 'X': - return presentation_type::hex_upper; - case 'b': - return presentation_type::bin_lower; - case 'B': - return presentation_type::bin_upper; - case 'a': - return presentation_type::hexfloat_lower; - case 'A': - return presentation_type::hexfloat_upper; - case 'e': - return presentation_type::exp_lower; - case 'E': - return presentation_type::exp_upper; - case 'f': - return presentation_type::fixed_lower; - case 'F': - return presentation_type::fixed_upper; - case 'g': - return presentation_type::general_lower; - case 'G': - return presentation_type::general_upper; - case 'c': - return presentation_type::chr; - case 's': - return presentation_type::string; - case 'p': - return presentation_type::pointer; - case '?': - return presentation_type::debug; - default: - return presentation_type::none; - } -} - -// Parses standard format specifiers and sends notifications about parsed -// components to handler. -template -FMT_CONSTEXPR FMT_INLINE auto parse_format_specs(const Char* begin, - const Char* end, - SpecHandler&& handler) - -> const Char* { - if (1 < end - begin && begin[1] == '}' && is_ascii_letter(*begin) && - *begin != 'L') { - presentation_type type = parse_presentation_type(*begin++); - if (type == presentation_type::none) - handler.on_error("invalid type specifier"); - handler.on_type(type); - return begin; - } - - if (begin == end) return begin; - - begin = parse_align(begin, end, handler); - if (begin == end) return begin; - - // Parse sign. - switch (to_ascii(*begin)) { - case '+': - handler.on_sign(sign::plus); - ++begin; - break; - case '-': - handler.on_sign(sign::minus); - ++begin; - break; - case ' ': - handler.on_sign(sign::space); - ++begin; - break; - default: - break; - } - if (begin == end) return begin; - - if (*begin == '#') { - handler.on_hash(); - if (++begin == end) return begin; - } - - // Parse zero flag. - if (*begin == '0') { - handler.on_zero(); - if (++begin == end) return begin; - } - - begin = parse_width(begin, end, handler); - if (begin == end) return begin; - - // Parse precision. - if (*begin == '.') { - begin = parse_precision(begin, end, handler); - if (begin == end) return begin; - } - - if (*begin == 'L') { - handler.on_localized(); - ++begin; - } - - // Parse type. - if (begin != end && *begin != '}') { - presentation_type type = parse_presentation_type(*begin++); - if (type == presentation_type::none) - handler.on_error("invalid type specifier"); - handler.on_type(type); - } - return begin; -} - -template -FMT_CONSTEXPR auto parse_replacement_field(const Char* begin, const Char* end, - Handler&& handler) -> const Char* { - struct id_adapter { - Handler& handler; - int arg_id; - - FMT_CONSTEXPR void operator()() { arg_id = handler.on_arg_id(); } - FMT_CONSTEXPR void operator()(int id) { arg_id = handler.on_arg_id(id); } - FMT_CONSTEXPR void operator()(basic_string_view id) { - arg_id = handler.on_arg_id(id); - } - FMT_CONSTEXPR void on_error(const char* message) { - if (message) handler.on_error(message); - } - }; - - ++begin; - if (begin == end) return handler.on_error("invalid format string"), end; - if (*begin == '}') { - handler.on_replacement_field(handler.on_arg_id(), begin); - } else if (*begin == '{') { - handler.on_text(begin, begin + 1); - } else { - auto adapter = id_adapter{handler, 0}; - begin = parse_arg_id(begin, end, adapter); - Char c = begin != end ? *begin : Char(); - if (c == '}') { - handler.on_replacement_field(adapter.arg_id, begin); - } else if (c == ':') { - begin = handler.on_format_specs(adapter.arg_id, begin + 1, end); - if (begin == end || *begin != '}') - return handler.on_error("unknown format specifier"), end; - } else { - return handler.on_error("missing '}' in format string"), end; - } - } - return begin + 1; -} - -template -FMT_CONSTEXPR FMT_INLINE void parse_format_string( - basic_string_view format_str, Handler&& handler) { - // Workaround a name-lookup bug in MSVC's modules implementation. - using detail::find; - - auto begin = format_str.data(); - auto end = begin + format_str.size(); - if (end - begin < 32) { - // Use a simple loop instead of memchr for small strings. - const Char* p = begin; - while (p != end) { - auto c = *p++; - if (c == '{') { - handler.on_text(begin, p - 1); - begin = p = parse_replacement_field(p - 1, end, handler); - } else if (c == '}') { - if (p == end || *p != '}') - return handler.on_error("unmatched '}' in format string"); - handler.on_text(begin, p); - begin = ++p; - } - } - handler.on_text(begin, end); - return; - } - struct writer { - FMT_CONSTEXPR void operator()(const Char* from, const Char* to) { - if (from == to) return; - for (;;) { - const Char* p = nullptr; - if (!find(from, to, Char('}'), p)) - return handler_.on_text(from, to); - ++p; - if (p == to || *p != '}') - return handler_.on_error("unmatched '}' in format string"); - handler_.on_text(from, p); - from = p + 1; - } - } - Handler& handler_; - } write = {handler}; - while (begin != end) { - // Doing two passes with memchr (one for '{' and another for '}') is up to - // 2.5x faster than the naive one-pass implementation on big format strings. - const Char* p = begin; - if (*begin != '{' && !find(begin + 1, end, Char('{'), p)) - return write(begin, end); - write(begin, p); - begin = parse_replacement_field(p, end, handler); - } -} - -template ::value> struct strip_named_arg { - using type = T; -}; -template struct strip_named_arg { - using type = remove_cvref_t; -}; - -template -FMT_CONSTEXPR auto parse_format_specs(ParseContext& ctx) - -> decltype(ctx.begin()) { - using char_type = typename ParseContext::char_type; - using context = buffer_context; - using stripped_type = typename strip_named_arg::type; - using mapped_type = conditional_t< - mapped_type_constant::value != type::custom_type, - decltype(arg_mapper().map(std::declval())), - stripped_type>; - auto f = conditional_t::value, - formatter, - fallback_formatter>(); - return f.parse(ctx); -} - -template -FMT_CONSTEXPR void check_int_type_spec(presentation_type type, - ErrorHandler&& eh) { - if (type > presentation_type::bin_upper && type != presentation_type::chr) - eh.on_error("invalid type specifier"); -} - -// Checks char specs and returns true if the type spec is char (and not int). -template -FMT_CONSTEXPR auto check_char_specs(const basic_format_specs& specs, - ErrorHandler&& eh = {}) -> bool { - if (specs.type != presentation_type::none && - specs.type != presentation_type::chr && - specs.type != presentation_type::debug) { - check_int_type_spec(specs.type, eh); - return false; - } - if (specs.align == align::numeric || specs.sign != sign::none || specs.alt) - eh.on_error("invalid format specifier for char"); - return true; -} - -// A floating-point presentation format. -enum class float_format : unsigned char { - general, // General: exponent notation or fixed point based on magnitude. - exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. - fixed, // Fixed point with the default precision of 6, e.g. 0.0012. - hex -}; - -struct float_specs { - int precision; - float_format format : 8; - sign_t sign : 8; - bool upper : 1; - bool locale : 1; - bool binary32 : 1; - bool showpoint : 1; -}; - -template -FMT_CONSTEXPR auto parse_float_type_spec(const basic_format_specs& specs, - ErrorHandler&& eh = {}) - -> float_specs { - auto result = float_specs(); - result.showpoint = specs.alt; - result.locale = specs.localized; - switch (specs.type) { - case presentation_type::none: - result.format = float_format::general; - break; - case presentation_type::general_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::general_lower: - result.format = float_format::general; - break; - case presentation_type::exp_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::exp_lower: - result.format = float_format::exp; - result.showpoint |= specs.precision != 0; - break; - case presentation_type::fixed_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::fixed_lower: - result.format = float_format::fixed; - result.showpoint |= specs.precision != 0; - break; - case presentation_type::hexfloat_upper: - result.upper = true; - FMT_FALLTHROUGH; - case presentation_type::hexfloat_lower: - result.format = float_format::hex; - break; - default: - eh.on_error("invalid type specifier"); - break; - } - return result; -} - -template -FMT_CONSTEXPR auto check_cstring_type_spec(presentation_type type, - ErrorHandler&& eh = {}) -> bool { - if (type == presentation_type::none || type == presentation_type::string || - type == presentation_type::debug) - return true; - if (type != presentation_type::pointer) eh.on_error("invalid type specifier"); - return false; -} - -template -FMT_CONSTEXPR void check_string_type_spec(presentation_type type, - ErrorHandler&& eh = {}) { - if (type != presentation_type::none && type != presentation_type::string && - type != presentation_type::debug) - eh.on_error("invalid type specifier"); -} - -template -FMT_CONSTEXPR void check_pointer_type_spec(presentation_type type, - ErrorHandler&& eh) { - if (type != presentation_type::none && type != presentation_type::pointer) - eh.on_error("invalid type specifier"); -} - -// A parse_format_specs handler that checks if specifiers are consistent with -// the argument type. -template class specs_checker : public Handler { - private: - detail::type arg_type_; - - FMT_CONSTEXPR void require_numeric_argument() { - if (!is_arithmetic_type(arg_type_)) - this->on_error("format specifier requires numeric argument"); - } - - public: - FMT_CONSTEXPR specs_checker(const Handler& handler, detail::type arg_type) - : Handler(handler), arg_type_(arg_type) {} - - FMT_CONSTEXPR void on_align(align_t align) { - if (align == align::numeric) require_numeric_argument(); - Handler::on_align(align); - } - - FMT_CONSTEXPR void on_sign(sign_t s) { - require_numeric_argument(); - if (is_integral_type(arg_type_) && arg_type_ != type::int_type && - arg_type_ != type::long_long_type && arg_type_ != type::int128_type && - arg_type_ != type::char_type) { - this->on_error("format specifier requires signed argument"); - } - Handler::on_sign(s); - } - - FMT_CONSTEXPR void on_hash() { - require_numeric_argument(); - Handler::on_hash(); - } - - FMT_CONSTEXPR void on_localized() { - require_numeric_argument(); - Handler::on_localized(); - } - - FMT_CONSTEXPR void on_zero() { - require_numeric_argument(); - Handler::on_zero(); - } - - FMT_CONSTEXPR void end_precision() { - if (is_integral_type(arg_type_) || arg_type_ == type::pointer_type) - this->on_error("precision not allowed for this argument type"); - } -}; - -constexpr int invalid_arg_index = -1; - -#if FMT_USE_NONTYPE_TEMPLATE_ARGS -template -constexpr auto get_arg_index_by_name(basic_string_view name) -> int { - if constexpr (detail::is_statically_named_arg()) { - if (name == T::name) return N; - } - if constexpr (sizeof...(Args) > 0) - return get_arg_index_by_name(name); - (void)name; // Workaround an MSVC bug about "unused" parameter. - return invalid_arg_index; -} -#endif - -template -FMT_CONSTEXPR auto get_arg_index_by_name(basic_string_view name) -> int { -#if FMT_USE_NONTYPE_TEMPLATE_ARGS - if constexpr (sizeof...(Args) > 0) - return get_arg_index_by_name<0, Args...>(name); -#endif - (void)name; - return invalid_arg_index; -} - -template -class format_string_checker { - private: - // In the future basic_format_parse_context will replace compile_parse_context - // here and will use is_constant_evaluated and downcasting to access the data - // needed for compile-time checks: https://godbolt.org/z/GvWzcTjh1. - using parse_context_type = compile_parse_context; - static constexpr int num_args = sizeof...(Args); - - // Format specifier parsing function. - using parse_func = const Char* (*)(parse_context_type&); - - parse_context_type context_; - parse_func parse_funcs_[num_args > 0 ? static_cast(num_args) : 1]; - type types_[num_args > 0 ? static_cast(num_args) : 1]; - - public: - explicit FMT_CONSTEXPR format_string_checker( - basic_string_view format_str, ErrorHandler eh) - : context_(format_str, num_args, types_, eh), - parse_funcs_{&parse_format_specs...}, - types_{ - mapped_type_constant>::value...} { - } - - FMT_CONSTEXPR void on_text(const Char*, const Char*) {} - - FMT_CONSTEXPR auto on_arg_id() -> int { return context_.next_arg_id(); } - FMT_CONSTEXPR auto on_arg_id(int id) -> int { - return context_.check_arg_id(id), id; - } - FMT_CONSTEXPR auto on_arg_id(basic_string_view id) -> int { -#if FMT_USE_NONTYPE_TEMPLATE_ARGS - auto index = get_arg_index_by_name(id); - if (index == invalid_arg_index) on_error("named argument is not found"); - return context_.check_arg_id(index), index; -#else - (void)id; - on_error("compile-time checks for named arguments require C++20 support"); - return 0; -#endif - } - - FMT_CONSTEXPR void on_replacement_field(int, const Char*) {} - - FMT_CONSTEXPR auto on_format_specs(int id, const Char* begin, const Char*) - -> const Char* { - context_.advance_to(context_.begin() + (begin - &*context_.begin())); - // id >= 0 check is a workaround for gcc 10 bug (#2065). - return id >= 0 && id < num_args ? parse_funcs_[id](context_) : begin; - } - - FMT_CONSTEXPR void on_error(const char* message) { - context_.on_error(message); - } -}; - -// Reports a compile-time error if S is not a valid format string. -template ::value)> -FMT_INLINE void check_format_string(const S&) { -#ifdef FMT_ENFORCE_COMPILE_STRING - static_assert(is_compile_string::value, - "FMT_ENFORCE_COMPILE_STRING requires all format strings to use " - "FMT_STRING."); -#endif -} -template ::value)> -void check_format_string(S format_str) { - FMT_CONSTEXPR auto s = basic_string_view(format_str); - using checker = format_string_checker...>; - FMT_CONSTEXPR bool invalid_format = - (parse_format_string(s, checker(s, {})), true); - ignore_unused(invalid_format); -} - -// Don't use type_identity for args to simplify symbols. -template -void vformat_to(buffer& buf, basic_string_view fmt, - basic_format_args args, - locale_ref loc = {}); - -FMT_API void vprint_mojibake(std::FILE*, string_view, format_args); -#ifndef _WIN32 -inline void vprint_mojibake(std::FILE*, string_view, format_args) {} -#endif -FMT_END_DETAIL_NAMESPACE - -// A formatter specialization for the core types corresponding to detail::type -// constants. -template -struct formatter::value != - detail::type::custom_type>> { - private: - detail::dynamic_format_specs specs_; - - public: - // Parses format specifiers stopping either at the end of the range or at the - // terminating '}'. - template - FMT_CONSTEXPR auto parse(ParseContext& ctx) -> decltype(ctx.begin()) { - auto begin = ctx.begin(), end = ctx.end(); - if (begin == end) return begin; - using handler_type = detail::dynamic_specs_handler; - auto type = detail::type_constant::value; - auto checker = - detail::specs_checker(handler_type(specs_, ctx), type); - auto it = detail::parse_format_specs(begin, end, checker); - auto eh = ctx.error_handler(); - switch (type) { - case detail::type::none_type: - FMT_ASSERT(false, "invalid argument type"); - break; - case detail::type::bool_type: - if (specs_.type == presentation_type::none || - specs_.type == presentation_type::string) { - break; - } - FMT_FALLTHROUGH; - case detail::type::int_type: - case detail::type::uint_type: - case detail::type::long_long_type: - case detail::type::ulong_long_type: - case detail::type::int128_type: - case detail::type::uint128_type: - detail::check_int_type_spec(specs_.type, eh); - break; - case detail::type::char_type: - detail::check_char_specs(specs_, eh); - break; - case detail::type::float_type: - if (detail::const_check(FMT_USE_FLOAT)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "float support disabled"); - break; - case detail::type::double_type: - if (detail::const_check(FMT_USE_DOUBLE)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "double support disabled"); - break; - case detail::type::long_double_type: - if (detail::const_check(FMT_USE_LONG_DOUBLE)) - detail::parse_float_type_spec(specs_, eh); - else - FMT_ASSERT(false, "long double support disabled"); - break; - case detail::type::cstring_type: - detail::check_cstring_type_spec(specs_.type, eh); - break; - case detail::type::string_type: - detail::check_string_type_spec(specs_.type, eh); - break; - case detail::type::pointer_type: - detail::check_pointer_type_spec(specs_.type, eh); - break; - case detail::type::custom_type: - // Custom format specifiers are checked in parse functions of - // formatter specializations. - break; - } - return it; - } - - template ::value, - enable_if_t<(U == detail::type::string_type || - U == detail::type::cstring_type || - U == detail::type::char_type), - int> = 0> - FMT_CONSTEXPR void set_debug_format() { - specs_.type = presentation_type::debug; - } - - template - FMT_CONSTEXPR auto format(const T& val, FormatContext& ctx) const - -> decltype(ctx.out()); -}; - -#define FMT_FORMAT_AS(Type, Base) \ - template \ - struct formatter : formatter { \ - template \ - auto format(Type const& val, FormatContext& ctx) const \ - -> decltype(ctx.out()) { \ - return formatter::format(static_cast(val), ctx); \ - } \ - } - -FMT_FORMAT_AS(signed char, int); -FMT_FORMAT_AS(unsigned char, unsigned); -FMT_FORMAT_AS(short, int); -FMT_FORMAT_AS(unsigned short, unsigned); -FMT_FORMAT_AS(long, long long); -FMT_FORMAT_AS(unsigned long, unsigned long long); -FMT_FORMAT_AS(Char*, const Char*); -FMT_FORMAT_AS(std::basic_string, basic_string_view); -FMT_FORMAT_AS(std::nullptr_t, const void*); -FMT_FORMAT_AS(detail::std_string_view, basic_string_view); - -template struct basic_runtime { basic_string_view str; }; - -/** A compile-time format string. */ -template class basic_format_string { - private: - basic_string_view str_; - - public: - template >::value)> - FMT_CONSTEVAL FMT_INLINE basic_format_string(const S& s) : str_(s) { - static_assert( - detail::count< - (std::is_base_of>::value && - std::is_reference::value)...>() == 0, - "passing views as lvalues is disallowed"); -#ifdef FMT_HAS_CONSTEVAL - if constexpr (detail::count_named_args() == - detail::count_statically_named_args()) { - using checker = detail::format_string_checker...>; - detail::parse_format_string(str_, checker(s, {})); - } -#else - detail::check_format_string(s); -#endif - } - basic_format_string(basic_runtime r) : str_(r.str) {} - - FMT_INLINE operator basic_string_view() const { return str_; } - FMT_INLINE basic_string_view get() const { return str_; } -}; - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -// Workaround broken conversion on older gcc. -template using format_string = string_view; -inline auto runtime(string_view s) -> string_view { return s; } -#else -template -using format_string = basic_format_string...>; -/** - \rst - Creates a runtime format string. - - **Example**:: - - // Check format string at runtime instead of compile-time. - fmt::print(fmt::runtime("{:d}"), "I am not a number"); - \endrst - */ -inline auto runtime(string_view s) -> basic_runtime { return {{s}}; } -#endif - -FMT_API auto vformat(string_view fmt, format_args args) -> std::string; - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and returns the result - as a string. - - **Example**:: - - #include - std::string message = fmt::format("The answer is {}.", 42); - \endrst -*/ -template -FMT_NODISCARD FMT_INLINE auto format(format_string fmt, T&&... args) - -> std::string { - return vformat(fmt, fmt::make_format_args(args...)); -} - -/** Formats a string and writes the output to ``out``. */ -template ::value)> -auto vformat_to(OutputIt out, string_view fmt, format_args args) -> OutputIt { - auto&& buf = detail::get_buffer(out); - detail::vformat_to(buf, fmt, args, {}); - return detail::get_iterator(buf, out); -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt``, writes the result to - the output iterator ``out`` and returns the iterator past the end of the output - range. `format_to` does not append a terminating null character. - - **Example**:: - - auto out = std::vector(); - fmt::format_to(std::back_inserter(out), "{}", 42); - \endrst - */ -template ::value)> -FMT_INLINE auto format_to(OutputIt out, format_string fmt, T&&... args) - -> OutputIt { - return vformat_to(out, fmt, fmt::make_format_args(args...)); -} - -template struct format_to_n_result { - /** Iterator past the end of the output range. */ - OutputIt out; - /** Total (not truncated) output size. */ - size_t size; -}; - -template ::value)> -auto vformat_to_n(OutputIt out, size_t n, string_view fmt, format_args args) - -> format_to_n_result { - using traits = detail::fixed_buffer_traits; - auto buf = detail::iterator_buffer(out, n); - detail::vformat_to(buf, fmt, args, {}); - return {buf.out(), buf.count()}; -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt``, writes up to ``n`` - characters of the result to the output iterator ``out`` and returns the total - (not truncated) output size and the iterator past the end of the output range. - `format_to_n` does not append a terminating null character. - \endrst - */ -template ::value)> -FMT_INLINE auto format_to_n(OutputIt out, size_t n, format_string fmt, - T&&... args) -> format_to_n_result { - return vformat_to_n(out, n, fmt, fmt::make_format_args(args...)); -} - -/** Returns the number of chars in the output of ``format(fmt, args...)``. */ -template -FMT_NODISCARD FMT_INLINE auto formatted_size(format_string fmt, - T&&... args) -> size_t { - auto buf = detail::counting_buffer<>(); - detail::vformat_to(buf, string_view(fmt), - format_args(fmt::make_format_args(args...)), {}); - return buf.count(); -} - -FMT_API void vprint(string_view fmt, format_args args); -FMT_API void vprint(std::FILE* f, string_view fmt, format_args args); - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and writes the output - to ``stdout``. - - **Example**:: - - fmt::print("Elapsed time: {0:.2f} seconds", 1.23); - \endrst - */ -template -FMT_INLINE void print(format_string fmt, T&&... args) { - const auto& vargs = fmt::make_format_args(args...); - return detail::is_utf8() ? vprint(fmt, vargs) - : detail::vprint_mojibake(stdout, fmt, vargs); -} - -/** - \rst - Formats ``args`` according to specifications in ``fmt`` and writes the - output to the file ``f``. - - **Example**:: - - fmt::print(stderr, "Don't {}!", "panic"); - \endrst - */ -template -FMT_INLINE void print(std::FILE* f, format_string fmt, T&&... args) { - const auto& vargs = fmt::make_format_args(args...); - return detail::is_utf8() ? vprint(f, fmt, vargs) - : detail::vprint_mojibake(f, fmt, vargs); -} - -FMT_MODULE_EXPORT_END -FMT_GCC_PRAGMA("GCC pop_options") -FMT_END_NAMESPACE - -#ifdef FMT_HEADER_ONLY -# include "format.h" -#endif -#endif // FMT_CORE_H_ +#include "format.h" diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h index 9ac55e47f22f..a887483b6f46 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format-inl.h @@ -8,21 +8,19 @@ #ifndef FMT_FORMAT_INL_H_ #define FMT_FORMAT_INL_H_ -#include -#include -#include // errno -#include -#include -#include -#include // std::memmove -#include -#include - -#ifndef FMT_STATIC_THOUSANDS_SEPARATOR -# include +#ifndef FMT_MODULE +# include +# include // errno +# include +# include +# include + +# if !defined(FMT_STATIC_THOUSANDS_SEPARATOR) +# include +# endif #endif -#ifdef _WIN32 +#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE) # include // _isatty #endif @@ -40,10 +38,6 @@ FMT_FUNC void assert_fail(const char* file, int line, const char* message) { std::terminate(); } -FMT_FUNC void throw_format_error(const char* message) { - FMT_THROW(format_error(message)); -} - FMT_FUNC void format_error_code(detail::buffer& out, int error_code, string_view message) noexcept { // Report error code making sure that the output fits into @@ -60,10 +54,10 @@ FMT_FUNC void format_error_code(detail::buffer& out, int error_code, ++error_code_size; } error_code_size += detail::to_unsigned(detail::count_digits(abs_value)); - auto it = buffer_appender(out); + auto it = appender(out); if (message.size() <= inline_buffer_size - error_code_size) - format_to(it, FMT_STRING("{}{}"), message, SEP); - format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); + fmt::format_to(it, FMT_STRING("{}{}"), message, SEP); + fmt::format_to(it, FMT_STRING("{}{}"), ERROR_STR, error_code); FMT_ASSERT(out.size() <= inline_buffer_size, ""); } @@ -77,13 +71,10 @@ FMT_FUNC void report_error(format_func func, int error_code, } // A wrapper around fwrite that throws on error. -inline void fwrite_fully(const void* ptr, size_t size, size_t count, - FILE* stream) { - size_t written = std::fwrite(ptr, size, count, stream); -#if !__NVCC__ +inline void fwrite_fully(const void* ptr, size_t count, FILE* stream) { + size_t written = std::fwrite(ptr, 1, count, stream); if (written < count) FMT_THROW(system_error(errno, FMT_STRING("cannot write to file"))); -#endif } #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -92,7 +83,7 @@ locale_ref::locale_ref(const Locale& loc) : locale_(&loc) { static_assert(std::is_same::value, ""); } -template Locale locale_ref::get() const { +template auto locale_ref::get() const -> Locale { static_assert(std::is_same::value, ""); return locale_ ? *static_cast(locale_) : std::locale(); } @@ -104,7 +95,8 @@ FMT_FUNC auto thousands_sep_impl(locale_ref loc) -> thousands_sep_result { auto thousands_sep = grouping.empty() ? Char() : facet.thousands_sep(); return {std::move(grouping), thousands_sep}; } -template FMT_FUNC Char decimal_point_impl(locale_ref loc) { +template +FMT_FUNC auto decimal_point_impl(locale_ref loc) -> Char { return std::use_facet>(loc.get()) .decimal_point(); } @@ -120,7 +112,11 @@ template FMT_FUNC Char decimal_point_impl(locale_ref) { FMT_FUNC auto write_loc(appender out, loc_value value, const format_specs& specs, locale_ref loc) -> bool { -#ifndef FMT_STATIC_THOUSANDS_SEPARATOR +#ifdef FMT_STATIC_THOUSANDS_SEPARATOR + value.visit(loc_writer<>{ + out, specs, std::string(1, FMT_STATIC_THOUSANDS_SEPARATOR), "\3", "."}); + return true; +#else auto locale = loc.get(); // We cannot use the num_put facet because it may produce output in // a wrong encoding. @@ -129,10 +125,13 @@ FMT_FUNC auto write_loc(appender out, loc_value value, return std::use_facet(locale).put(out, value, specs); return facet(locale).put(out, value, specs); #endif - return false; } } // namespace detail +FMT_FUNC void report_error(const char* message) { + FMT_THROW(format_error(message)); +} + template typename Locale::id format_facet::id; #ifndef FMT_STATIC_THOUSANDS_SEPARATOR @@ -150,96 +149,41 @@ FMT_API FMT_FUNC auto format_facet::do_put( } #endif -#if !FMT_MSC_VERSION -FMT_API FMT_FUNC format_error::~format_error() noexcept = default; -#endif - -#if !__NVCC__ -FMT_FUNC std::system_error vsystem_error(int error_code, string_view format_str, - format_args args) { +FMT_FUNC auto vsystem_error(int error_code, string_view fmt, format_args args) + -> std::system_error { auto ec = std::error_code(error_code, std::generic_category()); - return std::system_error(ec, vformat(format_str, args)); + return std::system_error(ec, vformat(fmt, args)); } -#endif namespace detail { -template inline bool operator==(basic_fp x, basic_fp y) { +template +inline auto operator==(basic_fp x, basic_fp y) -> bool { return x.f == y.f && x.e == y.e; } // Compilers should be able to optimize this into the ror instruction. -FMT_CONSTEXPR inline uint32_t rotr(uint32_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint32_t n, uint32_t r) noexcept -> uint32_t { r &= 31; return (n >> r) | (n << (32 - r)); } -FMT_CONSTEXPR inline uint64_t rotr(uint64_t n, uint32_t r) noexcept { +FMT_CONSTEXPR inline auto rotr(uint64_t n, uint32_t r) noexcept -> uint64_t { r &= 63; return (n >> r) | (n << (64 - r)); } -// Computes 128-bit result of multiplication of two 64-bit unsigned integers. -inline uint128_fallback umul128(uint64_t x, uint64_t y) noexcept { -#if FMT_USE_INT128 - auto p = static_cast(x) * static_cast(y); - return {static_cast(p >> 64), static_cast(p)}; -#elif defined(_MSC_VER) && defined(_M_X64) - auto result = uint128_fallback(); - result.lo_ = _umul128(x, y, &result.hi_); - return result; -#else - const uint64_t mask = static_cast(max_value()); - - uint64_t a = x >> 32; - uint64_t b = x & mask; - uint64_t c = y >> 32; - uint64_t d = y & mask; - - uint64_t ac = a * c; - uint64_t bc = b * c; - uint64_t ad = a * d; - uint64_t bd = b * d; - - uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); - - return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), - (intermediate << 32) + (bd & mask)}; -#endif -} - // Implementation of Dragonbox algorithm: https://github.com/jk-jeon/dragonbox. namespace dragonbox { -// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. -inline uint64_t umul128_upper64(uint64_t x, uint64_t y) noexcept { -#if FMT_USE_INT128 - auto p = static_cast(x) * static_cast(y); - return static_cast(p >> 64); -#elif defined(_MSC_VER) && defined(_M_X64) - return __umulh(x, y); -#else - return umul128(x, y).high(); -#endif -} - -// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a -// 128-bit unsigned integer. -inline uint128_fallback umul192_upper128(uint64_t x, - uint128_fallback y) noexcept { - uint128_fallback r = umul128(x, y.high()); - r += umul128_upper64(x, y.low()); - return r; -} - // Computes upper 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_upper64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_upper64(uint32_t x, uint64_t y) noexcept -> uint64_t { return umul128_upper64(static_cast(x) << 32, y); } // Computes lower 128 bits of multiplication of a 64-bit unsigned integer and a // 128-bit unsigned integer. -inline uint128_fallback umul192_lower128(uint64_t x, - uint128_fallback y) noexcept { +inline auto umul192_lower128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { uint64_t high = x * y.high(); uint128_fallback high_low = umul128(x, y.low()); return {high + high_low.high(), high_low.low()}; @@ -247,29 +191,17 @@ inline uint128_fallback umul192_lower128(uint64_t x, // Computes lower 64 bits of multiplication of a 32-bit unsigned integer and a // 64-bit unsigned integer. -inline uint64_t umul96_lower64(uint32_t x, uint64_t y) noexcept { +inline auto umul96_lower64(uint32_t x, uint64_t y) noexcept -> uint64_t { return x * y; } -// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from -// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. -inline int floor_log10_pow2(int e) noexcept { - FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); - static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); - return (e * 315653) >> 20; -} - // Various fast log computations. -inline int floor_log2_pow10(int e) noexcept { - FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); - return (e * 1741647) >> 19; -} -inline int floor_log10_pow2_minus_log10_4_over_3(int e) noexcept { +inline auto floor_log10_pow2_minus_log10_4_over_3(int e) noexcept -> int { FMT_ASSERT(e <= 2936 && e >= -2985, "too large exponent"); return (e * 631305 - 261663) >> 21; } -static constexpr struct { +FMT_INLINE_VARIABLE constexpr struct { uint32_t divisor; int shift_amount; } div_small_pow10_infos[] = {{10, 16}, {100, 16}}; @@ -278,7 +210,7 @@ static constexpr struct { // divisible by pow(10, N). // Precondition: n <= pow(10, N + 1). template -bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { +auto check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept -> bool { // The numbers below are chosen such that: // 1. floor(n/d) = floor(nm / 2^k) where d=10 or d=100, // 2. nm mod 2^k < m if and only if n is divisible by d, @@ -303,7 +235,7 @@ bool check_divisibility_and_divide_by_pow10(uint32_t& n) noexcept { // Computes floor(n / pow(10, N)) for small n and N. // Precondition: n <= pow(10, N + 1). -template uint32_t small_division_by_pow10(uint32_t n) noexcept { +template auto small_division_by_pow10(uint32_t n) noexcept -> uint32_t { constexpr auto info = div_small_pow10_infos[N - 1]; FMT_ASSERT(n <= info.divisor * 10, "n is too large"); constexpr uint32_t magic_number = @@ -312,24 +244,24 @@ template uint32_t small_division_by_pow10(uint32_t n) noexcept { } // Computes floor(n / 10^(kappa + 1)) (float) -inline uint32_t divide_by_10_to_kappa_plus_1(uint32_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint32_t n) noexcept -> uint32_t { // 1374389535 = ceil(2^37/100) return static_cast((static_cast(n) * 1374389535) >> 37); } // Computes floor(n / 10^(kappa + 1)) (double) -inline uint64_t divide_by_10_to_kappa_plus_1(uint64_t n) noexcept { +inline auto divide_by_10_to_kappa_plus_1(uint64_t n) noexcept -> uint64_t { // 2361183241434822607 = ceil(2^(64+7)/1000) return umul128_upper64(n, 2361183241434822607ull) >> 7; } // Various subroutines using pow10 cache -template struct cache_accessor; +template struct cache_accessor; template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint64_t; - static uint64_t get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint64_t { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); static constexpr const uint64_t pow10_significands[] = { @@ -371,20 +303,23 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul96_upper64(u, cache); return {static_cast(r >> 32), static_cast(r) == 0}; } - static uint32_t compute_delta(const cache_entry_type& cache, - int beta) noexcept { + static auto compute_delta(const cache_entry_type& cache, int beta) noexcept + -> uint32_t { return static_cast(cache >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -393,22 +328,22 @@ template <> struct cache_accessor { static_cast(r >> (32 - beta)) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache - (cache >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return static_cast( (cache + (cache >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta)); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (static_cast( cache >> (64 - num_significand_bits() - 2 - beta)) + 1) / @@ -420,7 +355,7 @@ template <> struct cache_accessor { using carrier_uint = float_info::carrier_uint; using cache_entry_type = uint128_fallback; - static uint128_fallback get_cached_power(int k) noexcept { + static auto get_cached_power(int k) noexcept -> uint128_fallback { FMT_ASSERT(k >= float_info::min_k && k <= float_info::max_k, "k is out of range"); @@ -1044,8 +979,22 @@ template <> struct cache_accessor { {0xfcf62c1dee382c42, 0x46729e03dd9ed7b6}, {0x9e19db92b4e31ba9, 0x6c07a2c26a8346d2}, {0xc5a05277621be293, 0xc7098b7305241886}, - { 0xf70867153aa2db38, - 0xb8cbee4fc66d1ea8 } + {0xf70867153aa2db38, 0xb8cbee4fc66d1ea8}, + {0x9a65406d44a5c903, 0x737f74f1dc043329}, + {0xc0fe908895cf3b44, 0x505f522e53053ff3}, + {0xf13e34aabb430a15, 0x647726b9e7c68ff0}, + {0x96c6e0eab509e64d, 0x5eca783430dc19f6}, + {0xbc789925624c5fe0, 0xb67d16413d132073}, + {0xeb96bf6ebadf77d8, 0xe41c5bd18c57e890}, + {0x933e37a534cbaae7, 0x8e91b962f7b6f15a}, + {0xb80dc58e81fe95a1, 0x723627bbb5a4adb1}, + {0xe61136f2227e3b09, 0xcec3b1aaa30dd91d}, + {0x8fcac257558ee4e6, 0x213a4f0aa5e8a7b2}, + {0xb3bd72ed2af29e1f, 0xa988e2cd4f62d19e}, + {0xe0accfa875af45a7, 0x93eb1b80a33b8606}, + {0x8c6c01c9498d8b88, 0xbc72f130660533c4}, + {0xaf87023b9bf0ee6a, 0xeb8fad7c7f8680b5}, + {0xdb68c2ca82ed2a05, 0xa67398db9f6820e2}, #else {0xff77b1fcbebcdc4f, 0x25e8e89c13bb0f7b}, {0xce5d73ff402d98e3, 0xfb0a3d212dc81290}, @@ -1069,8 +1018,8 @@ template <> struct cache_accessor { {0x8da471a9de737e24, 0x5ceaecfed289e5d3}, {0xe4d5e82392a40515, 0x0fabaf3feaa5334b}, {0xb8da1662e7b00a17, 0x3d6a751f3b936244}, - { 0x95527a5202df0ccb, - 0x0f37801e0c43ebc9 } + {0x95527a5202df0ccb, 0x0f37801e0c43ebc9}, + {0xf13e34aabb430a15, 0x647726b9e7c68ff0} #endif }; @@ -1130,19 +1079,22 @@ template <> struct cache_accessor { bool is_integer; }; - static compute_mul_result compute_mul( - carrier_uint u, const cache_entry_type& cache) noexcept { + static auto compute_mul(carrier_uint u, + const cache_entry_type& cache) noexcept + -> compute_mul_result { auto r = umul192_upper128(u, cache); return {r.high(), r.low() == 0}; } - static uint32_t compute_delta(cache_entry_type const& cache, - int beta) noexcept { + static auto compute_delta(cache_entry_type const& cache, int beta) noexcept + -> uint32_t { return static_cast(cache.high() >> (64 - 1 - beta)); } - static compute_mul_parity_result compute_mul_parity( - carrier_uint two_f, const cache_entry_type& cache, int beta) noexcept { + static auto compute_mul_parity(carrier_uint two_f, + const cache_entry_type& cache, + int beta) noexcept + -> compute_mul_parity_result { FMT_ASSERT(beta >= 1, ""); FMT_ASSERT(beta < 64, ""); @@ -1151,31 +1103,35 @@ template <> struct cache_accessor { ((r.high() << beta) | (r.low() >> (64 - beta))) == 0}; } - static carrier_uint compute_left_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_left_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() - (cache.high() >> (num_significand_bits() + 2))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_right_endpoint_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_right_endpoint_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return (cache.high() + (cache.high() >> (num_significand_bits() + 1))) >> (64 - num_significand_bits() - 1 - beta); } - static carrier_uint compute_round_up_for_shorter_interval_case( - const cache_entry_type& cache, int beta) noexcept { + static auto compute_round_up_for_shorter_interval_case( + const cache_entry_type& cache, int beta) noexcept -> carrier_uint { return ((cache.high() >> (64 - num_significand_bits() - 2 - beta)) + 1) / 2; } }; +FMT_FUNC auto get_cached_power(int k) noexcept -> uint128_fallback { + return cache_accessor::get_cached_power(k); +} + // Various integer checks -template -bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { +template +auto is_left_endpoint_integer_shorter_interval(int exponent) noexcept -> bool { const int case_shorter_interval_left_endpoint_lower_threshold = 2; const int case_shorter_interval_left_endpoint_upper_threshold = 3; return exponent >= case_shorter_interval_left_endpoint_lower_threshold && @@ -1183,12 +1139,12 @@ bool is_left_endpoint_integer_shorter_interval(int exponent) noexcept { } // Remove trailing zeros from n and return the number of zeros removed (float) -FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept { +FMT_INLINE int remove_trailing_zeros(uint32_t& n, int s = 0) noexcept { FMT_ASSERT(n != 0, ""); - const uint32_t mod_inv_5 = 0xcccccccd; - const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; + // Modular inverse of 5 (mod 2^32): (mod_inv_5 * 5) mod 2^32 = 1. + constexpr uint32_t mod_inv_5 = 0xcccccccd; + constexpr uint32_t mod_inv_25 = 0xc28f5c29; // = mod_inv_5 * mod_inv_5 - int s = 0; while (true) { auto q = rotr(n * mod_inv_25, 2); if (q > max_value() / 100) break; @@ -1200,7 +1156,6 @@ FMT_INLINE int remove_trailing_zeros(uint32_t& n) noexcept { n = q; s |= 1; } - return s; } @@ -1214,32 +1169,17 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { // Is n is divisible by 10^8? if ((nm.high() & ((1ull << (90 - 64)) - 1)) == 0 && nm.low() < magic_number) { - // If yes, work with the quotient. + // If yes, work with the quotient... auto n32 = static_cast(nm.high() >> (90 - 64)); - - const uint32_t mod_inv_5 = 0xcccccccd; - const uint32_t mod_inv_25 = mod_inv_5 * mod_inv_5; - - int s = 8; - while (true) { - auto q = rotr(n32 * mod_inv_25, 2); - if (q > max_value() / 100) break; - n32 = q; - s += 2; - } - auto q = rotr(n32 * mod_inv_5, 1); - if (q <= max_value() / 10) { - n32 = q; - s |= 1; - } - + // ... and use the 32 bit variant of the function + int s = remove_trailing_zeros(n32, 8); n = n32; return s; } // If n is not divisible by 10^8, work with n itself. - const uint64_t mod_inv_5 = 0xcccccccccccccccd; - const uint64_t mod_inv_25 = mod_inv_5 * mod_inv_5; + constexpr uint64_t mod_inv_5 = 0xcccccccccccccccd; + constexpr uint64_t mod_inv_25 = 0x8f5c28f5c28f5c29; // mod_inv_5 * mod_inv_5 int s = 0; while (true) { @@ -1258,7 +1198,7 @@ FMT_INLINE int remove_trailing_zeros(uint64_t& n) noexcept { } // The main algorithm for shorter interval case -template +template FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { decimal_fp ret_value; // Compute k and beta @@ -1305,7 +1245,7 @@ FMT_INLINE decimal_fp shorter_interval_case(int exponent) noexcept { return ret_value; } -template decimal_fp to_decimal(T x) noexcept { +template auto to_decimal(T x) noexcept -> decimal_fp { // Step 1: integer promotion & Schubfach multiplier calculation. using carrier_uint = typename float_info::carrier_uint; @@ -1429,17 +1369,6 @@ template decimal_fp to_decimal(T x) noexcept { return ret_value; } } // namespace dragonbox - -#ifdef _MSC_VER -FMT_FUNC auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) - -> int { - auto args = va_list(); - va_start(args, fmt); - int result = vsnprintf_s(buf, size, _TRUNCATE, fmt, args); - va_end(args); - return result; -} -#endif } // namespace detail template <> struct formatter { @@ -1455,15 +1384,15 @@ template <> struct formatter { for (auto i = n.bigits_.size(); i > 0; --i) { auto value = n.bigits_[i - 1u]; if (first) { - out = format_to(out, FMT_STRING("{:x}"), value); + out = fmt::format_to(out, FMT_STRING("{:x}"), value); first = false; continue; } - out = format_to(out, FMT_STRING("{:08x}"), value); + out = fmt::format_to(out, FMT_STRING("{:08x}"), value); } if (n.exp_ > 0) - out = format_to(out, FMT_STRING("p{}"), - n.exp_ * detail::bigint::bigit_bits); + out = fmt::format_to(out, FMT_STRING("p{}"), + n.exp_ * detail::bigint::bigit_bits); return out; } }; @@ -1485,14 +1414,12 @@ FMT_FUNC detail::utf8_to_utf16::utf8_to_utf16(string_view s) { FMT_FUNC void format_system_error(detail::buffer& out, int error_code, const char* message) noexcept { -#if !__NVCC__ FMT_TRY { auto ec = std::error_code(error_code, std::generic_category()); - write(std::back_inserter(out), std::system_error(ec, message).what()); + detail::write(appender(out), std::system_error(ec, message).what()); return; } FMT_CATCH(...) {} -#endif format_error_code(out, error_code, message); } @@ -1501,7 +1428,7 @@ FMT_FUNC void report_system_error(int error_code, report_error(format_system_error, error_code, message); } -FMT_FUNC std::string vformat(string_view fmt, format_args args) { +FMT_FUNC auto vformat(string_view fmt, format_args args) -> std::string { // Don't optimize the "{}" case to keep the binary size small and because it // can be better optimized in fmt::format anyway. auto buffer = memory_buffer(); @@ -1510,57 +1437,299 @@ FMT_FUNC std::string vformat(string_view fmt, format_args args) { } namespace detail { -#ifdef _WIN32 + +template struct span { + T* data; + size_t size; +}; + +template auto flockfile(F* f) -> decltype(_lock_file(f)) { + _lock_file(f); +} +template auto funlockfile(F* f) -> decltype(_unlock_file(f)) { + _unlock_file(f); +} + +#ifndef getc_unlocked +template auto getc_unlocked(F* f) -> decltype(_fgetc_nolock(f)) { + return _fgetc_nolock(f); +} +#endif + +template +struct has_flockfile : std::false_type {}; + +template +struct has_flockfile()))>> + : std::true_type {}; + +// A FILE wrapper. F is FILE defined as a template parameter to make system API +// detection work. +template class file_base { + public: + F* file_; + + public: + file_base(F* file) : file_(file) {} + operator F*() const { return file_; } + + // Reads a code unit from the stream. + auto get() -> int { + int result = getc_unlocked(file_); + if (result == EOF && ferror(file_) != 0) + FMT_THROW(system_error(errno, FMT_STRING("getc failed"))); + return result; + } + + // Puts the code unit back into the stream buffer. + void unget(char c) { + if (ungetc(c, file_) == EOF) + FMT_THROW(system_error(errno, FMT_STRING("ungetc failed"))); + } + + void flush() { fflush(this->file_); } +}; + +// A FILE wrapper for glibc. +template class glibc_file : public file_base { + private: + enum { + line_buffered = 0x200, // _IO_LINE_BUF + unbuffered = 2 // _IO_UNBUFFERED + }; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { + return (this->file_->_flags & unbuffered) == 0; + } + + void init_buffer() { + if (this->file_->_IO_write_ptr) return; + // Force buffer initialization by placing and removing a char in a buffer. + putc_unlocked(0, this->file_); + --this->file_->_IO_write_ptr; + } + + // Returns the file's read buffer. + auto get_read_buffer() const -> span { + auto ptr = this->file_->_IO_read_ptr; + return {ptr, to_unsigned(this->file_->_IO_read_end - ptr)}; + } + + // Returns the file's write buffer. + auto get_write_buffer() const -> span { + auto ptr = this->file_->_IO_write_ptr; + return {ptr, to_unsigned(this->file_->_IO_buf_end - ptr)}; + } + + void advance_write_buffer(size_t size) { this->file_->_IO_write_ptr += size; } + + bool needs_flush() const { + if ((this->file_->_flags & line_buffered) == 0) return false; + char* end = this->file_->_IO_write_end; + return memchr(end, '\n', to_unsigned(this->file_->_IO_write_ptr - end)); + } + + void flush() { fflush_unlocked(this->file_); } +}; + +// A FILE wrapper for Apple's libc. +template class apple_file : public file_base { + private: + enum { + line_buffered = 1, // __SNBF + unbuffered = 2 // __SLBF + }; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { + return (this->file_->_flags & unbuffered) == 0; + } + + void init_buffer() { + if (this->file_->_p) return; + // Force buffer initialization by placing and removing a char in a buffer. + putc_unlocked(0, this->file_); + --this->file_->_p; + ++this->file_->_w; + } + + auto get_read_buffer() const -> span { + return {reinterpret_cast(this->file_->_p), + to_unsigned(this->file_->_r)}; + } + + auto get_write_buffer() const -> span { + return {reinterpret_cast(this->file_->_p), + to_unsigned(this->file_->_bf._base + this->file_->_bf._size - + this->file_->_p)}; + } + + void advance_write_buffer(size_t size) { + this->file_->_p += size; + this->file_->_w -= size; + } + + bool needs_flush() const { + if ((this->file_->_flags & line_buffered) == 0) return false; + return memchr(this->file_->_p + this->file_->_w, '\n', + to_unsigned(-this->file_->_w)); + } +}; + +// A fallback FILE wrapper. +template class fallback_file : public file_base { + private: + char next_; // The next unconsumed character in the buffer. + bool has_next_ = false; + + public: + using file_base::file_base; + + auto is_buffered() const -> bool { return false; } + auto needs_flush() const -> bool { return false; } + void init_buffer() {} + + auto get_read_buffer() const -> span { + return {&next_, has_next_ ? 1u : 0u}; + } + + auto get_write_buffer() const -> span { return {nullptr, 0}; } + + void advance_write_buffer(size_t) {} + + auto get() -> int { + has_next_ = false; + return file_base::get(); + } + + void unget(char c) { + file_base::unget(c); + next_ = c; + has_next_ = true; + } +}; + +#ifndef FMT_USE_FALLBACK_FILE +# define FMT_USE_FALLBACK_FILE 1 +#endif + +template +auto get_file(F* f, int) -> apple_file { + return f; +} +template +inline auto get_file(F* f, int) -> glibc_file { + return f; +} + +inline auto get_file(FILE* f, ...) -> fallback_file { return f; } + +using file_ref = decltype(get_file(static_cast(nullptr), 0)); + +template +class file_print_buffer : public buffer { + public: + explicit file_print_buffer(F*) : buffer(nullptr, size_t()) {} +}; + +template +class file_print_buffer::value>> + : public buffer { + private: + file_ref file_; + + static void grow(buffer& base, size_t) { + auto& self = static_cast(base); + self.file_.advance_write_buffer(self.size()); + if (self.file_.get_write_buffer().size == 0) self.file_.flush(); + auto buf = self.file_.get_write_buffer(); + FMT_ASSERT(buf.size > 0, ""); + self.set(buf.data, buf.size); + self.clear(); + } + + public: + explicit file_print_buffer(F* f) : buffer(grow, size_t()), file_(f) { + flockfile(f); + file_.init_buffer(); + auto buf = file_.get_write_buffer(); + set(buf.data, buf.size); + } + ~file_print_buffer() { + file_.advance_write_buffer(size()); + bool flush = file_.needs_flush(); + F* f = file_; // Make funlockfile depend on the template parameter F + funlockfile(f); // for the system API detection to work. + if (flush) fflush(file_); + } +}; + +#if !defined(_WIN32) || defined(FMT_USE_WRITE_CONSOLE) +FMT_FUNC auto write_console(int, string_view) -> bool { return false; } +#else using dword = conditional_t; extern "C" __declspec(dllimport) int __stdcall WriteConsoleW( // void*, const void*, dword, dword*, void*); -FMT_FUNC bool write_console(std::FILE* f, string_view text) { - auto fd = _fileno(f); - if (_isatty(fd)) { - detail::utf8_to_utf16 u16(string_view(text.data(), text.size())); - auto written = detail::dword(); - if (detail::WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), - u16.c_str(), static_cast(u16.size()), - &written, nullptr)) { - return true; - } - } - // We return false if the file descriptor was not TTY, or it was but - // SetConsoleW failed which can happen if the output has been redirected to - // NUL. In both cases when we return false, we should attempt to do regular - // write via fwrite or std::ostream::write. - return false; +FMT_FUNC bool write_console(int fd, string_view text) { + auto u16 = utf8_to_utf16(text); + return WriteConsoleW(reinterpret_cast(_get_osfhandle(fd)), u16.c_str(), + static_cast(u16.size()), nullptr, nullptr) != 0; } #endif -FMT_FUNC void print(std::FILE* f, string_view text) { #ifdef _WIN32 - if (write_console(f, text)) return; +// Print assuming legacy (non-Unicode) encoding. +FMT_FUNC void vprint_mojibake(std::FILE* f, string_view fmt, format_args args, + bool newline) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); + if (newline) buffer.push_back('\n'); + fwrite_fully(buffer.data(), buffer.size(), f); +} #endif - detail::fwrite_fully(text.data(), 1, text.size(), f); + +FMT_FUNC void print(std::FILE* f, string_view text) { +#if defined(_WIN32) && !defined(FMT_USE_WRITE_CONSOLE) + int fd = _fileno(f); + if (_isatty(fd)) { + std::fflush(f); + if (write_console(fd, text)) return; + } +#endif + fwrite_fully(text.data(), text.size(), f); } } // namespace detail -FMT_FUNC void vprint(std::FILE* f, string_view format_str, format_args args) { - memory_buffer buffer; - detail::vformat_to(buffer, format_str, args); +FMT_FUNC void vprint_buffered(std::FILE* f, string_view fmt, format_args args) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); detail::print(f, {buffer.data(), buffer.size()}); } -#ifdef _WIN32 -// Print assuming legacy (non-Unicode) encoding. -FMT_FUNC void detail::vprint_mojibake(std::FILE* f, string_view format_str, - format_args args) { - memory_buffer buffer; - detail::vformat_to(buffer, format_str, - basic_format_args>(args)); - fwrite_fully(buffer.data(), 1, buffer.size(), f); +FMT_FUNC void vprint(std::FILE* f, string_view fmt, format_args args) { + if (!detail::file_ref(f).is_buffered() || !detail::has_flockfile<>()) + return vprint_buffered(f, fmt, args); + auto&& buffer = detail::file_print_buffer<>(f); + return detail::vformat_to(buffer, fmt, args); +} + +FMT_FUNC void vprintln(std::FILE* f, string_view fmt, format_args args) { + auto buffer = memory_buffer(); + detail::vformat_to(buffer, fmt, args); + buffer.push_back('\n'); + detail::print(f, {buffer.data(), buffer.size()}); } -#endif -FMT_FUNC void vprint(string_view format_str, format_args args) { - vprint(stdout, format_str, args); +FMT_FUNC void vprint(string_view fmt, format_args args) { + vprint(stdout, fmt, args); } namespace detail { diff --git a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h index 8dda88727d5c..67f0ab739b0d 100644 --- a/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h +++ b/packages/seacas/libraries/ioss/src/private_copy_fmt/fmt/format.h @@ -33,31 +33,65 @@ #ifndef FMT_FORMAT_H_ #define FMT_FORMAT_H_ -#include // std::signbit -#include // uint32_t -#include // std::memcpy -#include // std::initializer_list -#include // std::numeric_limits -#include // std::uninitialized_copy -#include // std::runtime_error -#include // std::system_error - -#ifdef __cpp_lib_bit_cast -# include // std::bitcast +#ifndef _LIBCPP_REMOVE_TRANSITIVE_INCLUDES +# define _LIBCPP_REMOVE_TRANSITIVE_INCLUDES +# define FMT_REMOVE_TRANSITIVE_INCLUDES #endif -#include "core.h" +#include "base.h" + +#ifndef FMT_MODULE +# include // std::signbit +# include // uint32_t +# include // std::memcpy +# include // std::initializer_list +# include // std::numeric_limits +# if defined(__GLIBCXX__) && !defined(_GLIBCXX_USE_DUAL_ABI) +// Workaround for pre gcc 5 libstdc++. +# include // std::allocator_traits +# endif +# include // std::runtime_error +# include // std::string +# include // std::system_error + +// Checking FMT_CPLUSPLUS for warning suppression in MSVC. +# if FMT_HAS_INCLUDE() && FMT_CPLUSPLUS > 201703L +# include // std::bit_cast +# endif + +// libc++ supports string_view in pre-c++17. +# if FMT_HAS_INCLUDE() && \ + (FMT_CPLUSPLUS >= 201703L || defined(_LIBCPP_VERSION)) +# include +# define FMT_USE_STRING_VIEW +# endif +#endif // FMT_MODULE -#if FMT_GCC_VERSION -# define FMT_GCC_VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#if defined __cpp_inline_variables && __cpp_inline_variables >= 201606L +# define FMT_INLINE_VARIABLE inline #else -# define FMT_GCC_VISIBILITY_HIDDEN +# define FMT_INLINE_VARIABLE +#endif + +#ifndef FMT_NO_UNIQUE_ADDRESS +# if FMT_CPLUSPLUS >= 202002L +# if FMT_HAS_CPP_ATTRIBUTE(no_unique_address) +# define FMT_NO_UNIQUE_ADDRESS [[no_unique_address]] +// VS2019 v16.10 and later except clang-cl (https://reviews.llvm.org/D110485). +# elif (FMT_MSC_VERSION >= 1929) && !FMT_CLANG_VERSION +# define FMT_NO_UNIQUE_ADDRESS [[msvc::no_unique_address]] +# endif +# endif +#endif +#ifndef FMT_NO_UNIQUE_ADDRESS +# define FMT_NO_UNIQUE_ADDRESS #endif -#ifdef __NVCC__ -# define FMT_CUDA_VERSION (__CUDACC_VER_MAJOR__ * 100 + __CUDACC_VER_MINOR__) +// Visibility when compiled as a shared library/object. +#if defined(FMT_LIB_EXPORT) || defined(FMT_SHARED) +# define FMT_SO_VISIBILITY(value) FMT_VISIBILITY(value) #else -# define FMT_CUDA_VERSION 0 +# define FMT_SO_VISIBILITY(value) #endif #ifdef __has_builtin @@ -72,11 +106,12 @@ # define FMT_NOINLINE #endif -#if FMT_MSC_VERSION -# define FMT_MSC_DEFAULT = default -#else -# define FMT_MSC_DEFAULT -#endif +namespace std { +template <> struct iterator_traits { + using iterator_category = output_iterator_tag; + using value_type = char; +}; +} // namespace std #ifndef FMT_THROW # if FMT_EXCEPTIONS @@ -96,21 +131,11 @@ FMT_END_NAMESPACE # define FMT_THROW(x) throw x # endif # else -# define FMT_THROW(x) \ - do { \ - FMT_ASSERT(false, (x).what()); \ - } while (false) +# define FMT_THROW(x) \ + ::fmt::detail::assert_fail(__FILE__, __LINE__, (x).what()) # endif #endif -#if FMT_EXCEPTIONS -# define FMT_TRY try -# define FMT_CATCH(x) catch (x) -#else -# define FMT_TRY if (true) -# define FMT_CATCH(x) if (false) -#endif - #ifndef FMT_MAYBE_UNUSED # if FMT_HAS_CPP17_ATTRIBUTE(maybe_unused) # define FMT_MAYBE_UNUSED [[maybe_unused]] @@ -121,7 +146,10 @@ FMT_END_NAMESPACE #ifndef FMT_USE_USER_DEFINED_LITERALS // EDG based compilers (Intel, NVIDIA, Elbrus, etc), GCC and MSVC support UDLs. -# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 407 || \ +// +// GCC before 4.9 requires a space in `operator"" _a` which is invalid in later +// compiler versions. +# if (FMT_HAS_FEATURE(cxx_user_literals) || FMT_GCC_VERSION >= 409 || \ FMT_MSC_VERSION >= 1900) && \ (!defined(__EDG_VERSION__) || __EDG_VERSION__ >= /* UDL feature */ 480) # define FMT_USE_USER_DEFINED_LITERALS 1 @@ -201,7 +229,8 @@ inline auto clzll(uint64_t x) -> int { _BitScanReverse64(&r, x); # else // Scan the high 32 bits. - if (_BitScanReverse(&r, static_cast(x >> 32))) return 63 ^ (r + 32); + if (_BitScanReverse(&r, static_cast(x >> 32))) + return 63 ^ static_cast(r + 32); // Scan the low 32 bits. _BitScanReverse(&r, static_cast(x)); # endif @@ -241,6 +270,11 @@ FMT_END_NAMESPACE #endif FMT_BEGIN_NAMESPACE + +template +struct is_contiguous> + : std::true_type {}; + namespace detail { FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { @@ -250,49 +284,12 @@ FMT_CONSTEXPR inline void abort_fuzzing_if(bool condition) { #endif } -template struct string_literal { - static constexpr CharT value[sizeof...(C)] = {C...}; - constexpr operator basic_string_view() const { - return {value, sizeof...(C)}; - } -}; - -#if FMT_CPLUSPLUS < 201703L -template -constexpr CharT string_literal::value[sizeof...(C)]; +#if defined(FMT_USE_STRING_VIEW) +template using std_string_view = std::basic_string_view; +#else +template struct std_string_view {}; #endif -template class formatbuf : public Streambuf { - private: - using char_type = typename Streambuf::char_type; - using streamsize = decltype(std::declval().sputn(nullptr, 0)); - using int_type = typename Streambuf::int_type; - using traits_type = typename Streambuf::traits_type; - - buffer& buffer_; - - public: - explicit formatbuf(buffer& buf) : buffer_(buf) {} - - protected: - // The put area is always empty. This makes the implementation simpler and has - // the advantage that the streambuf and the buffer are always in sync and - // sputc never writes into uninitialized memory. A disadvantage is that each - // call to sputc always results in a (virtual) call to overflow. There is no - // disadvantage here for sputn since this always results in a call to xsputn. - - auto overflow(int_type ch) -> int_type override { - if (!traits_type::eq_int_type(ch, traits_type::eof())) - buffer_.push_back(static_cast(ch)); - return ch; - } - - auto xsputn(const char_type* s, streamsize count) -> streamsize override { - buffer_.append(s, s + count); - return count; - } -}; - // Implementation of std::bit_cast for pre-C++20. template FMT_CONSTEXPR20 auto bit_cast(const From& from) -> To { @@ -324,14 +321,12 @@ class uint128_fallback { private: uint64_t lo_, hi_; - friend uint128_fallback umul128(uint64_t x, uint64_t y) noexcept; - public: constexpr uint128_fallback(uint64_t hi, uint64_t lo) : lo_(lo), hi_(hi) {} constexpr uint128_fallback(uint64_t value = 0) : lo_(value), hi_(0) {} - constexpr uint64_t high() const noexcept { return hi_; } - constexpr uint64_t low() const noexcept { return lo_; } + constexpr auto high() const noexcept -> uint64_t { return hi_; } + constexpr auto low() const noexcept -> uint64_t { return lo_; } template ::value)> constexpr explicit operator T() const { @@ -360,6 +355,10 @@ class uint128_fallback { -> uint128_fallback { return {lhs.hi_ & rhs.hi_, lhs.lo_ & rhs.lo_}; } + friend constexpr auto operator~(const uint128_fallback& n) + -> uint128_fallback { + return {~n.hi_, ~n.lo_}; + } friend auto operator+(const uint128_fallback& lhs, const uint128_fallback& rhs) -> uint128_fallback { auto result = uint128_fallback(lhs); @@ -398,8 +397,12 @@ class uint128_fallback { lo_ = new_lo; hi_ = new_hi; } + FMT_CONSTEXPR void operator&=(uint128_fallback n) { + lo_ &= n.lo_; + hi_ &= n.hi_; + } - FMT_CONSTEXPR20 uint128_fallback& operator+=(uint64_t n) noexcept { + FMT_CONSTEXPR20 auto operator+=(uint64_t n) noexcept -> uint128_fallback& { if (is_constant_evaluated()) { lo_ += n; hi_ += (lo_ < n ? 1 : 0); @@ -443,7 +446,8 @@ template constexpr auto num_bits() -> int { } // std::numeric_limits::digits may return 0 for 128-bit ints. template <> constexpr auto num_bits() -> int { return 128; } -template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } +template <> constexpr auto num_bits() -> int { return 128; } // A heterogeneous bit_cast used for converting 96-bit long double to uint128_t // and 128-bit pointers to uint128_fallback. @@ -464,10 +468,34 @@ inline auto bit_cast(const From& from) -> To { return result; } +template +FMT_CONSTEXPR20 inline auto countl_zero_fallback(UInt n) -> int { + int lz = 0; + constexpr UInt msb_mask = static_cast(1) << (num_bits() - 1); + for (; (n & msb_mask) == 0; n <<= 1) lz++; + return lz; +} + +FMT_CONSTEXPR20 inline auto countl_zero(uint32_t n) -> int { +#ifdef FMT_BUILTIN_CLZ + if (!is_constant_evaluated()) return FMT_BUILTIN_CLZ(n); +#endif + return countl_zero_fallback(n); +} + +FMT_CONSTEXPR20 inline auto countl_zero(uint64_t n) -> int { +#ifdef FMT_BUILTIN_CLZLL + if (!is_constant_evaluated()) return FMT_BUILTIN_CLZLL(n); +#endif + return countl_zero_fallback(n); +} + FMT_INLINE void assume(bool condition) { (void)condition; #if FMT_HAS_BUILTIN(__builtin_assume) && !FMT_ICC_VERSION __builtin_assume(condition); +#elif FMT_GCC_VERSION + if (!condition) __builtin_unreachable(); #endif } @@ -486,37 +514,24 @@ inline auto get_data(Container& c) -> typename Container::value_type* { return c.data(); } -#if defined(_SECURE_SCL) && _SECURE_SCL -// Make a checked iterator to avoid MSVC warnings. -template using checked_ptr = stdext::checked_array_iterator; -template -constexpr auto make_checked(T* p, size_t size) -> checked_ptr { - return {p, size}; -} -#else -template using checked_ptr = T*; -template constexpr auto make_checked(T* p, size_t) -> T* { - return p; -} -#endif - // Attempts to reserve space for n extra characters in the output range. // Returns a pointer to the reserved range or a reference to it. -template ::value)> +template ::value&& + is_contiguous::value)> #if FMT_CLANG_VERSION >= 307 && !FMT_ICC_VERSION __attribute__((no_sanitize("undefined"))) #endif inline auto -reserve(std::back_insert_iterator it, size_t n) - -> checked_ptr { - Container& c = get_container(it); +reserve(OutputIt it, size_t n) -> typename OutputIt::value_type* { + auto& c = get_container(it); size_t size = c.size(); c.resize(size + n); - return make_checked(get_data(c) + size, n); + return get_data(c) + size; } template -inline auto reserve(buffer_appender it, size_t n) -> buffer_appender { +inline auto reserve(basic_appender it, size_t n) -> basic_appender { buffer& buf = get_container(it); buf.try_reserve(buf.size() + n); return it; @@ -535,18 +550,21 @@ template constexpr auto to_pointer(OutputIt, size_t) -> T* { return nullptr; } -template auto to_pointer(buffer_appender it, size_t n) -> T* { +template auto to_pointer(basic_appender it, size_t n) -> T* { buffer& buf = get_container(it); auto size = buf.size(); + buf.try_reserve(size + n); if (buf.capacity() < size + n) return nullptr; buf.try_resize(size + n); return buf.data() + size; } -template ::value)> -inline auto base_iterator(std::back_insert_iterator& it, - checked_ptr) - -> std::back_insert_iterator { +template ::value&& + is_contiguous::value)> +inline auto base_iterator(OutputIt it, + typename OutputIt::container_type::value_type*) + -> OutputIt { return it; } @@ -572,16 +590,10 @@ FMT_CONSTEXPR20 auto fill_n(T* out, Size count, char value) -> T* { return out + count; } -#ifdef __cpp_char8_t -using char8_type = char8_t; -#else -enum char8_type : unsigned char {}; -#endif - template -FMT_CONSTEXPR FMT_NOINLINE auto copy_str_noinline(InputIt begin, InputIt end, - OutputIt out) -> OutputIt { - return copy_str(begin, end, out); +FMT_CONSTEXPR FMT_NOINLINE auto copy_noinline(InputIt begin, InputIt end, + OutputIt out) -> OutputIt { + return copy(begin, end, out); } // A public domain branchless UTF-8 decoder by Christopher Wellons: @@ -608,7 +620,8 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) constexpr const int shiftc[] = {0, 18, 12, 6, 0}; constexpr const int shifte[] = {0, 6, 4, 2, 0}; - int len = code_point_length_impl(*s); + int len = "\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\1\0\0\0\0\0\0\0\0\2\2\2\2\3\3\4" + [static_cast(*s) >> 3]; // Compute the pointer to the next character early so that the next // iteration can start working on the next character. Neither Clang // nor GCC figure out this reordering on their own. @@ -637,7 +650,7 @@ FMT_CONSTEXPR inline auto utf8_decode(const char* s, uint32_t* c, int* e) return next; } -constexpr uint32_t invalid_code_point = ~uint32_t(); +constexpr FMT_INLINE_VARIABLE uint32_t invalid_code_point = ~uint32_t(); // Invokes f(cp, sv) for every code point cp in s with sv being the string view // corresponding to the code point. cp is invalid_code_point on error. @@ -661,7 +674,7 @@ FMT_CONSTEXPR void for_each_codepoint(string_view s, F f) { } if (auto num_chars_left = s.data() + s.size() - p) { char buf[2 * block_size - 1] = {}; - copy_str(p, p + num_chars_left, buf); + copy(p, p + num_chars_left, buf); const char* buf_ptr = buf; do { auto end = decode(buf_ptr, p); @@ -678,7 +691,7 @@ inline auto compute_width(basic_string_view s) -> size_t { } // Computes approximate display width of a UTF-8 string. -FMT_CONSTEXPR inline size_t compute_width(string_view s) { +FMT_CONSTEXPR inline auto compute_width(string_view s) -> size_t { size_t num_code_points = 0; // It is not a lambda for compatibility with C++14. struct count_code_points { @@ -712,11 +725,6 @@ FMT_CONSTEXPR inline size_t compute_width(string_view s) { return num_code_points; } -inline auto compute_width(basic_string_view s) -> size_t { - return compute_width( - string_view(reinterpret_cast(s.data()), s.size())); -} - template inline auto code_point_index(basic_string_view s, size_t n) -> size_t { size_t size = s.size(); @@ -725,18 +733,17 @@ inline auto code_point_index(basic_string_view s, size_t n) -> size_t { // Calculates the index of the nth code point in a UTF-8 string. inline auto code_point_index(string_view s, size_t n) -> size_t { - const char* data = s.data(); - size_t num_code_points = 0; - for (size_t i = 0, size = s.size(); i != size; ++i) { - if ((data[i] & 0xc0) != 0x80 && ++num_code_points > n) return i; - } - return s.size(); -} - -inline auto code_point_index(basic_string_view s, size_t n) - -> size_t { - return code_point_index( - string_view(reinterpret_cast(s.data()), s.size()), n); + size_t result = s.size(); + const char* begin = s.begin(); + for_each_codepoint(s, [begin, &n, &result](uint32_t, string_view sv) { + if (n != 0) { + --n; + return true; + } + result = to_unsigned(sv.begin() - begin); + return false; + }); + return result; } template struct is_integral : std::is_integral {}; @@ -754,18 +761,32 @@ using is_integer = !std::is_same::value && !std::is_same::value>; -#ifndef FMT_USE_FLOAT128 -# ifdef __SIZEOF_FLOAT128__ -# define FMT_USE_FLOAT128 1 -# else -# define FMT_USE_FLOAT128 0 -# endif +#ifndef FMT_USE_FLOAT +# define FMT_USE_FLOAT 1 +#endif +#ifndef FMT_USE_DOUBLE +# define FMT_USE_DOUBLE 1 +#endif +#ifndef FMT_USE_LONG_DOUBLE +# define FMT_USE_LONG_DOUBLE 1 +#endif + +#if defined(FMT_USE_FLOAT128) +// Use the provided definition. +#elif FMT_CLANG_VERSION && FMT_HAS_INCLUDE() +# define FMT_USE_FLOAT128 1 +#elif FMT_GCC_VERSION && defined(_GLIBCXX_USE_FLOAT128) && \ + !defined(__STRICT_ANSI__) +# define FMT_USE_FLOAT128 1 +#else +# define FMT_USE_FLOAT128 0 #endif #if FMT_USE_FLOAT128 using float128 = __float128; #else using float128 = void; #endif + template using is_float128 = std::is_same; template @@ -784,61 +805,39 @@ using is_double_double = bool_constant::digits == 106>; # define FMT_USE_FULL_CACHE_DRAGONBOX 0 #endif -template -template -void buffer::append(const U* begin, const U* end) { - while (begin != end) { - auto count = to_unsigned(end - begin); - try_reserve(size_ + count); - auto free_cap = capacity_ - size_; - if (free_cap < count) count = free_cap; - std::uninitialized_copy_n(begin, count, make_checked(ptr_ + size_, count)); - size_ += count; - begin += count; - } -} - template struct is_locale : std::false_type {}; template struct is_locale> : std::true_type {}; } // namespace detail -FMT_MODULE_EXPORT_BEGIN +FMT_BEGIN_EXPORT // The number of characters to store in the basic_memory_buffer object itself // to avoid dynamic memory allocation. enum { inline_buffer_size = 500 }; /** - \rst - A dynamically growing memory buffer for trivially copyable/constructible types - with the first ``SIZE`` elements stored in the object itself. - - You can use the ``memory_buffer`` type alias for ``char`` instead. - - **Example**:: - - auto out = fmt::memory_buffer(); - format_to(std::back_inserter(out), "The answer is {}.", 42); - - This will append the following output to the ``out`` object: - - .. code-block:: none - - The answer is 42. - - The output can be converted to an ``std::string`` with ``to_string(out)``. - \endrst + * A dynamically growing memory buffer for trivially copyable/constructible + * types with the first `SIZE` elements stored in the object itself. Most + * commonly used via the `memory_buffer` alias for `char`. + * + * **Example**: + * + * auto out = fmt::memory_buffer(); + * fmt::format_to(std::back_inserter(out), "The answer is {}.", 42); + * + * This will append "The answer is 42." to `out`. The buffer content can be + * converted to `std::string` with `to_string(out)`. */ template > -class basic_memory_buffer final : public detail::buffer { +class basic_memory_buffer : public detail::buffer { private: T store_[SIZE]; - // Don't inherit from Allocator avoid generating type_info for it. - Allocator alloc_; + // Don't inherit from Allocator to avoid generating type_info for it. + FMT_NO_UNIQUE_ADDRESS Allocator alloc_; // Deallocate memory allocated by the buffer. FMT_CONSTEXPR20 void deallocate() { @@ -846,8 +845,29 @@ class basic_memory_buffer final : public detail::buffer { if (data != store_) alloc_.deallocate(data, this->capacity()); } - protected: - FMT_CONSTEXPR20 void grow(size_t size) override; + static FMT_CONSTEXPR20 void grow(detail::buffer& buf, size_t size) { + detail::abort_fuzzing_if(size > 5000); + auto& self = static_cast(buf); + const size_t max_size = + std::allocator_traits::max_size(self.alloc_); + size_t old_capacity = buf.capacity(); + size_t new_capacity = old_capacity + old_capacity / 2; + if (size > new_capacity) + new_capacity = size; + else if (new_capacity > max_size) + new_capacity = size > max_size ? size : max_size; + T* old_data = buf.data(); + T* new_data = self.alloc_.allocate(new_capacity); + // Suppress a bogus -Wstringop-overflow in gcc 13.1 (#3481). + detail::assume(buf.size() <= new_capacity); + // The following code doesn't throw, so the raw pointer above doesn't leak. + memcpy(new_data, old_data, buf.size() * sizeof(T)); + self.set(new_data, new_capacity); + // deallocate must not throw according to the standard, but even if it does, + // the buffer already uses the new storage and will deallocate it in + // destructor. + if (old_data != self.store_) self.alloc_.deallocate(old_data, old_capacity); + } public: using value_type = T; @@ -855,7 +875,7 @@ class basic_memory_buffer final : public detail::buffer { FMT_CONSTEXPR20 explicit basic_memory_buffer( const Allocator& alloc = Allocator()) - : alloc_(alloc) { + : detail::buffer(grow), alloc_(alloc) { this->set(store_, SIZE); if (detail::is_constant_evaluated()) detail::fill_n(store_, SIZE, T()); } @@ -869,8 +889,7 @@ class basic_memory_buffer final : public detail::buffer { size_t size = other.size(), capacity = other.capacity(); if (data == other.store_) { this->set(store_, capacity); - detail::copy_str(other.store_, other.store_ + size, - detail::make_checked(store_, capacity)); + detail::copy(other.store_, other.store_ + size, store_); } else { this->set(data, capacity); // Set pointer to the inline array so that delete is not called @@ -882,21 +901,14 @@ class basic_memory_buffer final : public detail::buffer { } public: - /** - \rst - Constructs a :class:`fmt::basic_memory_buffer` object moving the content - of the other object to it. - \endrst - */ - FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept { + /// Constructs a `basic_memory_buffer` object moving the content of the other + /// object to it. + FMT_CONSTEXPR20 basic_memory_buffer(basic_memory_buffer&& other) noexcept + : detail::buffer(grow) { move(other); } - /** - \rst - Moves the content of the other ``basic_memory_buffer`` object to this one. - \endrst - */ + /// Moves the content of the other `basic_memory_buffer` object to this one. auto operator=(basic_memory_buffer&& other) noexcept -> basic_memory_buffer& { FMT_ASSERT(this != &other, ""); deallocate(); @@ -907,16 +919,13 @@ class basic_memory_buffer final : public detail::buffer { // Returns a copy of the allocator associated with this buffer. auto get_allocator() const -> Allocator { return alloc_; } - /** - Resizes the buffer to contain *count* elements. If T is a POD type new - elements may not be initialized. - */ + /// Resizes the buffer to contain `count` elements. If T is a POD type new + /// elements may not be initialized. FMT_CONSTEXPR20 void resize(size_t count) { this->try_resize(count); } - /** Increases the buffer capacity to *new_capacity*. */ + /// Increases the buffer capacity to `new_capacity`. void reserve(size_t new_capacity) { this->try_reserve(new_capacity); } - // Directly append data into the buffer using detail::buffer::append; template void append(const ContiguousRange& range) { @@ -924,61 +933,37 @@ class basic_memory_buffer final : public detail::buffer { } }; -template -FMT_CONSTEXPR20 void basic_memory_buffer::grow( - size_t size) { - detail::abort_fuzzing_if(size > 5000); - const size_t max_size = std::allocator_traits::max_size(alloc_); - size_t old_capacity = this->capacity(); - size_t new_capacity = old_capacity + old_capacity / 2; - if (size > new_capacity) - new_capacity = size; - else if (new_capacity > max_size) - new_capacity = size > max_size ? size : max_size; - T* old_data = this->data(); - T* new_data = - std::allocator_traits::allocate(alloc_, new_capacity); - // The following code doesn't throw, so the raw pointer above doesn't leak. - std::uninitialized_copy(old_data, old_data + this->size(), - detail::make_checked(new_data, new_capacity)); - this->set(new_data, new_capacity); - // deallocate must not throw according to the standard, but even if it does, - // the buffer already uses the new storage and will deallocate it in - // destructor. - if (old_data != store_) alloc_.deallocate(old_data, old_capacity); -} - using memory_buffer = basic_memory_buffer; template struct is_contiguous> : std::true_type { }; +FMT_END_EXPORT namespace detail { -#ifdef _WIN32 -FMT_API bool write_console(std::FILE* f, string_view text); -#endif +FMT_API auto write_console(int fd, string_view text) -> bool; FMT_API void print(std::FILE*, string_view); } // namespace detail -/** An error reported from a formatting function. */ -FMT_CLASS_API -class FMT_API format_error : public std::runtime_error { +FMT_BEGIN_EXPORT + +// Suppress a misleading warning in older versions of clang. +#if FMT_CLANG_VERSION +# pragma clang diagnostic ignored "-Wweak-vtables" +#endif + +/// An error reported from a formatting function. +class FMT_SO_VISIBILITY("default") format_error : public std::runtime_error { public: using std::runtime_error::runtime_error; - format_error(const format_error&) = default; - format_error& operator=(const format_error&) = default; - format_error(format_error&&) = default; - format_error& operator=(format_error&&) = default; - ~format_error() noexcept override FMT_MSC_DEFAULT; }; namespace detail_exported { #if FMT_USE_NONTYPE_TEMPLATE_ARGS template struct fixed_string { constexpr fixed_string(const Char (&str)[N]) { - detail::copy_str(static_cast(str), - str + N, data); + detail::copy(static_cast(str), + str + N, data); } Char data[N] = {}; }; @@ -993,12 +978,57 @@ constexpr auto compile_string_to_view(const Char (&s)[N]) return {s, N - (std::char_traits::to_int_type(s[N - 1]) == 0 ? 1 : 0)}; } template -constexpr auto compile_string_to_view(detail::std_string_view s) +constexpr auto compile_string_to_view(basic_string_view s) -> basic_string_view { - return {s.data(), s.size()}; + return s; } } // namespace detail_exported +// A generic formatting context with custom output iterator and character +// (code unit) support. Char is the format string code unit type which can be +// different from OutputIt::value_type. +template class generic_context { + private: + OutputIt out_; + basic_format_args args_; + detail::locale_ref loc_; + + public: + using char_type = Char; + using iterator = OutputIt; + using parse_context_type = basic_format_parse_context; + template using formatter_type = formatter; + + constexpr generic_context(OutputIt out, + basic_format_args ctx_args, + detail::locale_ref loc = {}) + : out_(out), args_(ctx_args), loc_(loc) {} + generic_context(generic_context&&) = default; + generic_context(const generic_context&) = delete; + void operator=(const generic_context&) = delete; + + constexpr auto arg(int id) const -> basic_format_arg { + return args_.get(id); + } + auto arg(basic_string_view name) -> basic_format_arg { + return args_.get(name); + } + FMT_CONSTEXPR auto arg_id(basic_string_view name) -> int { + return args_.get_id(name); + } + auto args() const -> const basic_format_args& { + return args_; + } + + FMT_CONSTEXPR auto out() -> iterator { return out_; } + + void advance_to(iterator it) { + if (!detail::is_back_insert_iterator()) out_ = it; + } + + FMT_CONSTEXPR auto locale() -> detail::locale_ref { return loc_; } +}; + class loc_value { private: basic_format_arg value_; @@ -1011,7 +1041,7 @@ class loc_value { loc_value(T) {} template auto visit(Visitor&& vis) -> decltype(vis(0)) { - return visit_format_arg(vis, value_); + return value_.visit(vis); } }; @@ -1044,7 +1074,9 @@ template class format_facet : public Locale::facet { } }; -FMT_BEGIN_DETAIL_NAMESPACE +FMT_END_EXPORT + +namespace detail { // Returns true if value is negative, false otherwise. // Same as `value < 0` but doesn't produce warnings if T is an unsigned type. @@ -1075,13 +1107,13 @@ using uint32_or_64_or_128_t = template using uint64_or_128_t = conditional_t() <= 64, uint64_t, uint128_t>; -#define FMT_POWERS_OF_10(factor) \ - factor * 10, (factor)*100, (factor)*1000, (factor)*10000, (factor)*100000, \ - (factor)*1000000, (factor)*10000000, (factor)*100000000, \ - (factor)*1000000000 +#define FMT_POWERS_OF_10(factor) \ + factor * 10, (factor) * 100, (factor) * 1000, (factor) * 10000, \ + (factor) * 100000, (factor) * 1000000, (factor) * 10000000, \ + (factor) * 100000000, (factor) * 1000000000 // Converts value in the range [0, 100) to a string. -constexpr const char* digits2(size_t value) { +constexpr auto digits2(size_t value) -> const char* { // GCC generates slightly better code when value is pointer-size. return &"0001020304050607080910111213141516171819" "2021222324252627282930313233343536373839" @@ -1091,11 +1123,11 @@ constexpr const char* digits2(size_t value) { } // Sign is a template parameter to workaround a bug in gcc 4.8. -template constexpr Char sign(Sign s) { +template constexpr auto sign(Sign s) -> Char { #if !FMT_GCC_VERSION || FMT_GCC_VERSION >= 604 static_assert(std::is_same::value, ""); #endif - return static_cast("\0-+ "[s]); + return static_cast(((' ' << 24) | ('+' << 16) | ('-' << 8)) >> (s * 8)); } template FMT_CONSTEXPR auto count_digits_fallback(T n) -> int { @@ -1143,9 +1175,7 @@ inline auto do_count_digits(uint64_t n) -> int { // except for n == 0 in which case count_digits returns 1. FMT_CONSTEXPR20 inline auto count_digits(uint64_t n) -> int { #ifdef FMT_BUILTIN_CLZLL - if (!is_constant_evaluated()) { - return do_count_digits(n); - } + if (!is_constant_evaluated()) return do_count_digits(n); #endif return count_digits_fallback(n); } @@ -1173,7 +1203,7 @@ FMT_CONSTEXPR auto count_digits(UInt n) -> int { FMT_INLINE auto do_count_digits(uint32_t n) -> int { // An optimization by Kendall Willets from https://bit.ly/3uOIQrB. // This increments the upper 32 bits (log10(T) - 1) when >= T is added. -# define FMT_INC(T) (((sizeof(# T) - 1ull) << 32) - T) +# define FMT_INC(T) (((sizeof(#T) - 1ull) << 32) - T) static constexpr uint64_t table[] = { FMT_INC(0), FMT_INC(0), FMT_INC(0), // 8 FMT_INC(10), FMT_INC(10), FMT_INC(10), // 64 @@ -1291,7 +1321,7 @@ FMT_CONSTEXPR inline auto format_decimal(Iterator out, UInt value, int size) // Buffer is large enough to hold all digits (digits10 + 1). Char buffer[digits10() + 1] = {}; auto end = format_decimal(buffer, value, size).end; - return {out, detail::copy_str_noinline(buffer, end, out)}; + return {out, detail::copy_noinline(buffer, end, out)}; } template @@ -1309,16 +1339,16 @@ FMT_CONSTEXPR auto format_uint(Char* buffer, UInt value, int num_digits, } template -inline auto format_uint(It out, UInt value, int num_digits, bool upper = false) - -> It { +FMT_CONSTEXPR inline auto format_uint(It out, UInt value, int num_digits, + bool upper = false) -> It { if (auto ptr = to_pointer(out, to_unsigned(num_digits))) { format_uint(ptr, value, num_digits, upper); return out; } // Buffer should be large enough to hold all digits (digits / BASE_BITS + 1). - char buffer[num_bits() / BASE_BITS + 1]; + char buffer[num_bits() / BASE_BITS + 1] = {}; format_uint(buffer, value, num_digits, upper); - return detail::copy_str_noinline(buffer, buffer + num_digits, out); + return detail::copy_noinline(buffer, buffer + num_digits, out); } // A converter from UTF-8 to UTF-16. @@ -1334,7 +1364,140 @@ class utf8_to_utf16 { auto str() const -> std::wstring { return {&buffer_[0], size()}; } }; +enum class to_utf8_error_policy { abort, replace }; + +// A converter from UTF-16/UTF-32 (host endian) to UTF-8. +template class to_utf8 { + private: + Buffer buffer_; + + public: + to_utf8() {} + explicit to_utf8(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) { + static_assert(sizeof(WChar) == 2 || sizeof(WChar) == 4, + "Expect utf16 or utf32"); + if (!convert(s, policy)) + FMT_THROW(std::runtime_error(sizeof(WChar) == 2 ? "invalid utf16" + : "invalid utf32")); + } + operator string_view() const { return string_view(&buffer_[0], size()); } + auto size() const -> size_t { return buffer_.size() - 1; } + auto c_str() const -> const char* { return &buffer_[0]; } + auto str() const -> std::string { return std::string(&buffer_[0], size()); } + + // Performs conversion returning a bool instead of throwing exception on + // conversion error. This method may still throw in case of memory allocation + // error. + auto convert(basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { + if (!convert(buffer_, s, policy)) return false; + buffer_.push_back(0); + return true; + } + static auto convert(Buffer& buf, basic_string_view s, + to_utf8_error_policy policy = to_utf8_error_policy::abort) + -> bool { + for (auto p = s.begin(); p != s.end(); ++p) { + uint32_t c = static_cast(*p); + if (sizeof(WChar) == 2 && c >= 0xd800 && c <= 0xdfff) { + // Handle a surrogate pair. + ++p; + if (p == s.end() || (c & 0xfc00) != 0xd800 || (*p & 0xfc00) != 0xdc00) { + if (policy == to_utf8_error_policy::abort) return false; + buf.append(string_view("\xEF\xBF\xBD")); + --p; + } else { + c = (c << 10) + static_cast(*p) - 0x35fdc00; + } + } else if (c < 0x80) { + buf.push_back(static_cast(c)); + } else if (c < 0x800) { + buf.push_back(static_cast(0xc0 | (c >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if ((c >= 0x800 && c <= 0xd7ff) || (c >= 0xe000 && c <= 0xffff)) { + buf.push_back(static_cast(0xe0 | (c >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else if (c >= 0x10000 && c <= 0x10ffff) { + buf.push_back(static_cast(0xf0 | (c >> 18))); + buf.push_back(static_cast(0x80 | ((c & 0x3ffff) >> 12))); + buf.push_back(static_cast(0x80 | ((c & 0xfff) >> 6))); + buf.push_back(static_cast(0x80 | (c & 0x3f))); + } else { + return false; + } + } + return true; + } +}; + +// Computes 128-bit result of multiplication of two 64-bit unsigned integers. +inline auto umul128(uint64_t x, uint64_t y) noexcept -> uint128_fallback { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return {static_cast(p >> 64), static_cast(p)}; +#elif defined(_MSC_VER) && defined(_M_X64) + auto hi = uint64_t(); + auto lo = _umul128(x, y, &hi); + return {hi, lo}; +#else + const uint64_t mask = static_cast(max_value()); + + uint64_t a = x >> 32; + uint64_t b = x & mask; + uint64_t c = y >> 32; + uint64_t d = y & mask; + + uint64_t ac = a * c; + uint64_t bc = b * c; + uint64_t ad = a * d; + uint64_t bd = b * d; + + uint64_t intermediate = (bd >> 32) + (ad & mask) + (bc & mask); + + return {ac + (intermediate >> 32) + (ad >> 32) + (bc >> 32), + (intermediate << 32) + (bd & mask)}; +#endif +} + namespace dragonbox { +// Computes floor(log10(pow(2, e))) for e in [-2620, 2620] using the method from +// https://fmt.dev/papers/Dragonbox.pdf#page=28, section 6.1. +inline auto floor_log10_pow2(int e) noexcept -> int { + FMT_ASSERT(e <= 2620 && e >= -2620, "too large exponent"); + static_assert((-1 >> 1) == -1, "right shift is not arithmetic"); + return (e * 315653) >> 20; +} + +inline auto floor_log2_pow10(int e) noexcept -> int { + FMT_ASSERT(e <= 1233 && e >= -1233, "too large exponent"); + return (e * 1741647) >> 19; +} + +// Computes upper 64 bits of multiplication of two 64-bit unsigned integers. +inline auto umul128_upper64(uint64_t x, uint64_t y) noexcept -> uint64_t { +#if FMT_USE_INT128 + auto p = static_cast(x) * static_cast(y); + return static_cast(p >> 64); +#elif defined(_MSC_VER) && defined(_M_X64) + return __umulh(x, y); +#else + return umul128(x, y).high(); +#endif +} + +// Computes upper 128 bits of multiplication of a 64-bit unsigned integer and a +// 128-bit unsigned integer. +inline auto umul192_upper128(uint64_t x, uint128_fallback y) noexcept + -> uint128_fallback { + uint128_fallback r = umul128(x, y.high()); + r += umul128_upper64(x, y.low()); + return r; +} + +FMT_API auto get_cached_power(int k) noexcept -> uint128_fallback; // Type-specific information that Dragonbox uses. template struct float_info; @@ -1358,7 +1521,7 @@ template <> struct float_info { static const int big_divisor = 1000; static const int small_divisor = 100; static const int min_k = -292; - static const int max_k = 326; + static const int max_k = 341; static const int shorter_interval_tie_lower_threshold = -77; static const int shorter_interval_tie_upper_threshold = -77; }; @@ -1388,14 +1551,14 @@ template FMT_API auto to_decimal(T x) noexcept -> decimal_fp; } // namespace dragonbox // Returns true iff Float has the implicit bit which is not stored. -template constexpr bool has_implicit_bit() { +template constexpr auto has_implicit_bit() -> bool { // An 80-bit FP number has a 64-bit significand an no implicit bit. return std::numeric_limits::digits != 64; } // Returns the number of significand bits stored in Float. The implicit bit is // not counted since it is not stored. -template constexpr int num_significand_bits() { +template constexpr auto num_significand_bits() -> int { // std::numeric_limits may not support __float128. return is_float128() ? 112 : (std::numeric_limits::digits - @@ -1405,8 +1568,8 @@ template constexpr int num_significand_bits() { template constexpr auto exponent_mask() -> typename dragonbox::float_info::carrier_uint { - using uint = typename dragonbox::float_info::carrier_uint; - return ((uint(1) << dragonbox::float_info::exponent_bits) - 1) + using float_uint = typename dragonbox::float_info::carrier_uint; + return ((float_uint(1) << dragonbox::float_info::exponent_bits) - 1) << num_significand_bits(); } template constexpr auto exponent_bias() -> int { @@ -1488,7 +1651,7 @@ using fp = basic_fp; // Normalizes the value converted from double and multiplied by (1 << SHIFT). template -FMT_CONSTEXPR basic_fp normalize(basic_fp value) { +FMT_CONSTEXPR auto normalize(basic_fp value) -> basic_fp { // Handle subnormals. const auto implicit_bit = F(1) << num_significand_bits(); const auto shifted_implicit_bit = implicit_bit << SHIFT; @@ -1505,7 +1668,7 @@ FMT_CONSTEXPR basic_fp normalize(basic_fp value) { } // Computes lhs * rhs / pow(2, 64) rounded to nearest with half-up tie breaking. -FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { +FMT_CONSTEXPR inline auto multiply(uint64_t lhs, uint64_t rhs) -> uint64_t { #if FMT_USE_INT128 auto product = static_cast<__uint128_t>(lhs) * rhs; auto f = static_cast(product >> 64); @@ -1522,188 +1685,36 @@ FMT_CONSTEXPR inline uint64_t multiply(uint64_t lhs, uint64_t rhs) { #endif } -FMT_CONSTEXPR inline fp operator*(fp x, fp y) { +FMT_CONSTEXPR inline auto operator*(fp x, fp y) -> fp { return {multiply(x.f, y.f), x.e + y.e + 64}; } -template struct basic_data { - // Normalized 64-bit significands of pow(10, k), for k = -348, -340, ..., 340. - // These are generated by support/compute-powers.py. - static constexpr uint64_t pow10_significands[87] = { - 0xfa8fd5a0081c0288, 0xbaaee17fa23ebf76, 0x8b16fb203055ac76, - 0xcf42894a5dce35ea, 0x9a6bb0aa55653b2d, 0xe61acf033d1a45df, - 0xab70fe17c79ac6ca, 0xff77b1fcbebcdc4f, 0xbe5691ef416bd60c, - 0x8dd01fad907ffc3c, 0xd3515c2831559a83, 0x9d71ac8fada6c9b5, - 0xea9c227723ee8bcb, 0xaecc49914078536d, 0x823c12795db6ce57, - 0xc21094364dfb5637, 0x9096ea6f3848984f, 0xd77485cb25823ac7, - 0xa086cfcd97bf97f4, 0xef340a98172aace5, 0xb23867fb2a35b28e, - 0x84c8d4dfd2c63f3b, 0xc5dd44271ad3cdba, 0x936b9fcebb25c996, - 0xdbac6c247d62a584, 0xa3ab66580d5fdaf6, 0xf3e2f893dec3f126, - 0xb5b5ada8aaff80b8, 0x87625f056c7c4a8b, 0xc9bcff6034c13053, - 0x964e858c91ba2655, 0xdff9772470297ebd, 0xa6dfbd9fb8e5b88f, - 0xf8a95fcf88747d94, 0xb94470938fa89bcf, 0x8a08f0f8bf0f156b, - 0xcdb02555653131b6, 0x993fe2c6d07b7fac, 0xe45c10c42a2b3b06, - 0xaa242499697392d3, 0xfd87b5f28300ca0e, 0xbce5086492111aeb, - 0x8cbccc096f5088cc, 0xd1b71758e219652c, 0x9c40000000000000, - 0xe8d4a51000000000, 0xad78ebc5ac620000, 0x813f3978f8940984, - 0xc097ce7bc90715b3, 0x8f7e32ce7bea5c70, 0xd5d238a4abe98068, - 0x9f4f2726179a2245, 0xed63a231d4c4fb27, 0xb0de65388cc8ada8, - 0x83c7088e1aab65db, 0xc45d1df942711d9a, 0x924d692ca61be758, - 0xda01ee641a708dea, 0xa26da3999aef774a, 0xf209787bb47d6b85, - 0xb454e4a179dd1877, 0x865b86925b9bc5c2, 0xc83553c5c8965d3d, - 0x952ab45cfa97a0b3, 0xde469fbd99a05fe3, 0xa59bc234db398c25, - 0xf6c69a72a3989f5c, 0xb7dcbf5354e9bece, 0x88fcf317f22241e2, - 0xcc20ce9bd35c78a5, 0x98165af37b2153df, 0xe2a0b5dc971f303a, - 0xa8d9d1535ce3b396, 0xfb9b7cd9a4a7443c, 0xbb764c4ca7a44410, - 0x8bab8eefb6409c1a, 0xd01fef10a657842c, 0x9b10a4e5e9913129, - 0xe7109bfba19c0c9d, 0xac2820d9623bf429, 0x80444b5e7aa7cf85, - 0xbf21e44003acdd2d, 0x8e679c2f5e44ff8f, 0xd433179d9c8cb841, - 0x9e19db92b4e31ba9, 0xeb96bf6ebadf77d9, 0xaf87023b9bf0ee6b, - }; - -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic push -# pragma GCC diagnostic ignored "-Wnarrowing" -#endif - // Binary exponents of pow(10, k), for k = -348, -340, ..., 340, corresponding - // to significands above. - static constexpr int16_t pow10_exponents[87] = { - -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980, -954, - -927, -901, -874, -847, -821, -794, -768, -741, -715, -688, -661, - -635, -608, -582, -555, -529, -502, -475, -449, -422, -396, -369, - -343, -316, -289, -263, -236, -210, -183, -157, -130, -103, -77, - -50, -24, 3, 30, 56, 83, 109, 136, 162, 189, 216, - 242, 269, 295, 322, 348, 375, 402, 428, 455, 481, 508, - 534, 561, 588, 614, 641, 667, 694, 720, 747, 774, 800, - 827, 853, 880, 907, 933, 960, 986, 1013, 1039, 1066}; -#if FMT_GCC_VERSION && FMT_GCC_VERSION < 409 -# pragma GCC diagnostic pop -#endif - - static constexpr uint64_t power_of_10_64[20] = { - 1, FMT_POWERS_OF_10(1ULL), FMT_POWERS_OF_10(1000000000ULL), - 10000000000000000000ULL}; -}; - -#if FMT_CPLUSPLUS < 201703L -template constexpr uint64_t basic_data::pow10_significands[]; -template constexpr int16_t basic_data::pow10_exponents[]; -template constexpr uint64_t basic_data::power_of_10_64[]; -#endif - -// This is a struct rather than an alias to avoid shadowing warnings in gcc. -struct data : basic_data<> {}; - -// Returns a cached power of 10 `c_k = c_k.f * pow(2, c_k.e)` such that its -// (binary) exponent satisfies `min_exponent <= c_k.e <= min_exponent + 28`. -FMT_CONSTEXPR inline fp get_cached_power(int min_exponent, - int& pow10_exponent) { - const int shift = 32; - // log10(2) = 0x0.4d104d427de7fbcc... - const int64_t significand = 0x4d104d427de7fbcc; - int index = static_cast( - ((min_exponent + fp::num_significand_bits - 1) * (significand >> shift) + - ((int64_t(1) << shift) - 1)) // ceil - >> 32 // arithmetic shift - ); - // Decimal exponent of the first (smallest) cached power of 10. - const int first_dec_exp = -348; - // Difference between 2 consecutive decimal exponents in cached powers of 10. - const int dec_exp_step = 8; - index = (index - first_dec_exp - 1) / dec_exp_step + 1; - pow10_exponent = first_dec_exp + index * dec_exp_step; - // Using *(x + index) instead of x[index] avoids an issue with some compilers - // using the EDG frontend (e.g. nvhpc/22.3 in C++17 mode). - return {*(data::pow10_significands + index), - *(data::pow10_exponents + index)}; -} - -#ifndef _MSC_VER -# define FMT_SNPRINTF snprintf -#else -FMT_API auto fmt_snprintf(char* buf, size_t size, const char* fmt, ...) -> int; -# define FMT_SNPRINTF fmt_snprintf -#endif // _MSC_VER - -// Formats a floating-point number with snprintf using the hexfloat format. -template -auto snprintf_float(T value, int precision, float_specs specs, - buffer& buf) -> int { - // Buffer capacity must be non-zero, otherwise MSVC's vsnprintf_s will fail. - FMT_ASSERT(buf.capacity() > buf.size(), "empty buffer"); - FMT_ASSERT(specs.format == float_format::hex, ""); - static_assert(!std::is_same::value, ""); - - // Build the format string. - char format[7]; // The longest format is "%#.*Le". - char* format_ptr = format; - *format_ptr++ = '%'; - if (specs.showpoint) *format_ptr++ = '#'; - if (precision >= 0) { - *format_ptr++ = '.'; - *format_ptr++ = '*'; - } - if (std::is_same()) *format_ptr++ = 'L'; - *format_ptr++ = specs.upper ? 'A' : 'a'; - *format_ptr = '\0'; - - // Format using snprintf. - auto offset = buf.size(); - for (;;) { - auto begin = buf.data() + offset; - auto capacity = buf.capacity() - offset; - abort_fuzzing_if(precision > 100000); - // Suppress the warning about a nonliteral format string. - // Cannot use auto because of a bug in MinGW (#1532). - int (*snprintf_ptr)(char*, size_t, const char*, ...) = FMT_SNPRINTF; - int result = precision >= 0 - ? snprintf_ptr(begin, capacity, format, precision, value) - : snprintf_ptr(begin, capacity, format, value); - if (result < 0) { - // The buffer will grow exponentially. - buf.try_reserve(buf.capacity() + 1); - continue; - } - auto size = to_unsigned(result); - // Size equal to capacity means that the last character was truncated. - if (size < capacity) { - buf.try_resize(size + offset); - return 0; - } - buf.try_reserve(size + offset + 1); // Add 1 for the terminating '\0'. - } -} - -template +template () == num_bits()> using convert_float_result = - conditional_t::value || - std::numeric_limits::digits == - std::numeric_limits::digits, - double, T>; + conditional_t::value || doublish, double, T>; template constexpr auto convert_float(T value) -> convert_float_result { return static_cast>(value); } -template -FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, - const fill_t& fill) -> OutputIt { +template +FMT_NOINLINE FMT_CONSTEXPR auto fill(OutputIt it, size_t n, const fill_t& fill) + -> OutputIt { auto fill_size = fill.size(); - if (fill_size == 1) return detail::fill_n(it, n, fill[0]); - auto data = fill.data(); - for (size_t i = 0; i < n; ++i) - it = copy_str(data, data + fill_size, it); + if (fill_size == 1) return detail::fill_n(it, n, fill.template get()); + if (const Char* data = fill.template data()) { + for (size_t i = 0; i < n; ++i) it = copy(data, data + fill_size, it); + } return it; } // Writes the output of f, padded according to format specifications in specs. // size: output size in code units. // width: output display width in (terminal) column positions. -template -FMT_CONSTEXPR auto write_padded(OutputIt out, - const basic_format_specs& specs, +FMT_CONSTEXPR auto write_padded(OutputIt out, const format_specs& specs, size_t size, size_t width, F&& f) -> OutputIt { static_assert(align == align::left || align == align::right, ""); unsigned spec_width = to_unsigned(specs.width); @@ -1714,33 +1725,32 @@ FMT_CONSTEXPR auto write_padded(OutputIt out, size_t left_padding = padding >> shifts[specs.align]; size_t right_padding = padding - left_padding; auto it = reserve(out, size + padding * specs.fill.size()); - if (left_padding != 0) it = fill(it, left_padding, specs.fill); + if (left_padding != 0) it = fill(it, left_padding, specs.fill); it = f(it); - if (right_padding != 0) it = fill(it, right_padding, specs.fill); + if (right_padding != 0) it = fill(it, right_padding, specs.fill); return base_iterator(out, it); } -template -constexpr auto write_padded(OutputIt out, const basic_format_specs& specs, +constexpr auto write_padded(OutputIt out, const format_specs& specs, size_t size, F&& f) -> OutputIt { - return write_padded(out, specs, size, size, f); + return write_padded(out, specs, size, size, f); } -template +template FMT_CONSTEXPR auto write_bytes(OutputIt out, string_view bytes, - const basic_format_specs& specs) - -> OutputIt { - return write_padded( + const format_specs& specs = {}) -> OutputIt { + return write_padded( out, specs, bytes.size(), [bytes](reserve_iterator it) { const char* data = bytes.data(); - return copy_str(data, data + bytes.size(), it); + return copy(data, data + bytes.size(), it); }); } template -auto write_ptr(OutputIt out, UIntPtr value, - const basic_format_specs* specs) -> OutputIt { +auto write_ptr(OutputIt out, UIntPtr value, const format_specs* specs) + -> OutputIt { int num_digits = count_digits<4>(value); auto size = to_unsigned(num_digits) + size_t(2); auto write = [=](reserve_iterator it) { @@ -1748,7 +1758,7 @@ auto write_ptr(OutputIt out, UIntPtr value, *it++ = static_cast('x'); return format_uint<4, Char>(it, value, num_digits); }; - return specs ? write_padded(out, *specs, size, write) + return specs ? write_padded(out, *specs, size, write) : base_iterator(out, write(reserve(out, size))); } @@ -1766,17 +1776,11 @@ template struct find_escape_result { uint32_t cp; }; -template -using make_unsigned_char = - typename conditional_t::value, - std::make_unsigned, - type_identity>::type; - template auto find_escape(const Char* begin, const Char* end) -> find_escape_result { for (; begin != end; ++begin) { - uint32_t cp = static_cast>(*begin); + uint32_t cp = static_cast>(*begin); if (const_check(sizeof(Char) == 1) && cp >= 0x80) continue; if (needs_escape(cp)) return {begin, begin + 1, cp}; } @@ -1785,7 +1789,7 @@ auto find_escape(const Char* begin, const Char* end) inline auto find_escape(const char* begin, const char* end) -> find_escape_result { - if (!is_utf8()) return find_escape(begin, end); + if (!use_utf8()) return find_escape(begin, end); auto result = find_escape_result{end, nullptr, 0}; for_each_codepoint(string_view(begin, to_unsigned(end - begin)), [&](uint32_t cp, string_view sv) { @@ -1802,7 +1806,7 @@ inline auto find_escape(const char* begin, const char* end) [] { \ /* Use the hidden visibility as a workaround for a GCC bug (#1973). */ \ /* Use a macro-like name to avoid shadowing warnings. */ \ - struct FMT_GCC_VISIBILITY_HIDDEN FMT_COMPILE_STRING : base { \ + struct FMT_VISIBILITY("hidden") FMT_COMPILE_STRING : base { \ using char_type FMT_MAYBE_UNUSED = fmt::remove_cvref_t; \ FMT_MAYBE_UNUSED FMT_CONSTEXPR explicit \ operator fmt::basic_string_view() const { \ @@ -1813,14 +1817,12 @@ inline auto find_escape(const char* begin, const char* end) }() /** - \rst - Constructs a compile-time format string from a string literal *s*. - - **Example**:: - - // A compile-time error because 'd' is an invalid specifier for strings. - std::string s = fmt::format(FMT_STRING("{:d}"), "foo"); - \endrst + * Constructs a compile-time format string from a string literal `s`. + * + * **Example**: + * + * // A compile-time error because 'd' is an invalid specifier for strings. + * std::string s = fmt::format(FMT_STRING("{:d}"), "foo"); */ #define FMT_STRING(s) FMT_STRING_IMPL(s, fmt::detail::compile_string, ) @@ -1831,7 +1833,7 @@ auto write_codepoint(OutputIt out, char prefix, uint32_t cp) -> OutputIt { Char buf[width]; fill_n(buf, width, static_cast('0')); format_uint<4>(buf, cp, width); - return copy_str(buf, buf + width, out); + return copy(buf, buf + width, out); } template @@ -1859,17 +1861,11 @@ auto write_escaped_cp(OutputIt out, const find_escape_result& escape) *out++ = static_cast('\\'); break; default: - if (is_utf8()) { - if (escape.cp < 0x100) { - return write_codepoint<2, Char>(out, 'x', escape.cp); - } - if (escape.cp < 0x10000) { - return write_codepoint<4, Char>(out, 'u', escape.cp); - } - if (escape.cp < 0x110000) { - return write_codepoint<8, Char>(out, 'U', escape.cp); - } - } + if (escape.cp < 0x100) return write_codepoint<2, Char>(out, 'x', escape.cp); + if (escape.cp < 0x10000) + return write_codepoint<4, Char>(out, 'u', escape.cp); + if (escape.cp < 0x110000) + return write_codepoint<8, Char>(out, 'U', escape.cp); for (Char escape_char : basic_string_view( escape.begin, to_unsigned(escape.end - escape.begin))) { out = write_codepoint<2, Char>(out, 'x', @@ -1888,7 +1884,7 @@ auto write_escaped_string(OutputIt out, basic_string_view str) auto begin = str.begin(), end = str.end(); do { auto escape = find_escape(begin, end); - out = copy_str(begin, escape.begin, out); + out = copy(begin, escape.begin, out); begin = escape.end; if (!begin) break; out = write_escaped_cp(out, escape); @@ -1899,11 +1895,13 @@ auto write_escaped_string(OutputIt out, basic_string_view str) template auto write_escaped_char(OutputIt out, Char v) -> OutputIt { + Char v_array[1] = {v}; *out++ = static_cast('\''); if ((needs_escape(static_cast(v)) && v != static_cast('"')) || v == static_cast('\'')) { - out = write_escaped_cp( - out, find_escape_result{&v, &v + 1, static_cast(v)}); + out = write_escaped_cp(out, + find_escape_result{v_array, v_array + 1, + static_cast(v)}); } else { *out++ = v; } @@ -1913,22 +1911,23 @@ auto write_escaped_char(OutputIt out, Char v) -> OutputIt { template FMT_CONSTEXPR auto write_char(OutputIt out, Char value, - const basic_format_specs& specs) - -> OutputIt { + const format_specs& specs) -> OutputIt { bool is_debug = specs.type == presentation_type::debug; - return write_padded(out, specs, 1, [=](reserve_iterator it) { + return write_padded(out, specs, 1, [=](reserve_iterator it) { if (is_debug) return write_escaped_char(it, value); *it++ = value; return it; }); } template -FMT_CONSTEXPR auto write(OutputIt out, Char value, - const basic_format_specs& specs, +FMT_CONSTEXPR auto write(OutputIt out, Char value, const format_specs& specs, locale_ref loc = {}) -> OutputIt { + // char is formatted as unsigned char for consistency across platforms. + using unsigned_type = + conditional_t::value, unsigned char, unsigned>; return check_char_specs(specs) - ? write_char(out, value, specs) - : write(out, static_cast(value), specs, loc); + ? write_char(out, value, specs) + : write(out, static_cast(value), specs, loc); } // Data for write_int that doesn't depend on output iterator type. It is used to @@ -1938,7 +1937,7 @@ template struct write_int_data { size_t padding; FMT_CONSTEXPR write_int_data(int num_digits, unsigned prefix, - const basic_format_specs& specs) + const format_specs& specs) : size((prefix >> 24) + to_unsigned(num_digits)), padding(0) { if (specs.align == align::numeric) { auto width = to_unsigned(specs.width); @@ -1957,10 +1956,10 @@ template struct write_int_data { // // where are written by write_digits(it). // prefix contains chars in three lower bytes and the size in the fourth byte. -template +template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, unsigned prefix, - const basic_format_specs& specs, + const format_specs& specs, W write_digits) -> OutputIt { // Slightly faster check for specs.width == 0 && specs.precision == -1. if ((specs.width | (specs.precision + 1)) == 0) { @@ -1972,7 +1971,7 @@ FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, int num_digits, return base_iterator(out, write_digits(it)); } auto data = write_int_data(num_digits, prefix, specs); - return write_padded( + return write_padded( out, specs, data.size, [=](reserve_iterator it) { for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) *it++ = static_cast(p & 0xff); @@ -1990,10 +1989,10 @@ template class digit_grouping { std::string::const_iterator group; int pos; }; - next_state initial_state() const { return {grouping_.begin(), 0}; } + auto initial_state() const -> next_state { return {grouping_.begin(), 0}; } // Returns the next digit group separator position. - int next(next_state& state) const { + auto next(next_state& state) const -> int { if (thousands_sep_.empty()) return max_value(); if (state.group == grouping_.end()) return state.pos += grouping_.back(); if (*state.group <= 0 || *state.group == max_value()) @@ -2012,9 +2011,9 @@ template class digit_grouping { digit_grouping(std::string grouping, std::basic_string sep) : grouping_(std::move(grouping)), thousands_sep_(std::move(sep)) {} - bool has_separator() const { return !thousands_sep_.empty(); } + auto has_separator() const -> bool { return !thousands_sep_.empty(); } - int count_separators(int num_digits) const { + auto count_separators(int num_digits) const -> int { int count = 0; auto state = initial_state(); while (num_digits > next(state)) ++count; @@ -2023,7 +2022,7 @@ template class digit_grouping { // Applies grouping to digits and write the output to out. template - Out apply(Out out, basic_string_view digits) const { + auto apply(Out out, basic_string_view digits) const -> Out { auto num_digits = static_cast(digits.size()); auto separators = basic_memory_buffer(); separators.push_back(0); @@ -2035,9 +2034,8 @@ template class digit_grouping { for (int i = 0, sep_index = static_cast(separators.size() - 1); i < num_digits; ++i) { if (num_digits - i == separators[sep_index]) { - out = - copy_str(thousands_sep_.data(), - thousands_sep_.data() + thousands_sep_.size(), out); + out = copy(thousands_sep_.data(), + thousands_sep_.data() + thousands_sep_.size(), out); --sep_index; } *out++ = static_cast(digits[to_unsigned(i)]); @@ -2046,41 +2044,71 @@ template class digit_grouping { } }; +FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { + prefix |= prefix != 0 ? value << 8 : value; + prefix += (1u + (value > 0xff ? 1 : 0)) << 24; +} + // Writes a decimal integer with digit grouping. template auto write_int(OutputIt out, UInt value, unsigned prefix, - const basic_format_specs& specs, - const digit_grouping& grouping) -> OutputIt { + const format_specs& specs, const digit_grouping& grouping) + -> OutputIt { static_assert(std::is_same, UInt>::value, ""); - int num_digits = count_digits(value); - char digits[40]; - format_decimal(digits, value, num_digits); - unsigned size = to_unsigned((prefix != 0 ? 1 : 0) + num_digits + - grouping.count_separators(num_digits)); - return write_padded( + int num_digits = 0; + auto buffer = memory_buffer(); + switch (specs.type) { + default: + FMT_ASSERT(false, ""); + FMT_FALLTHROUGH; + case presentation_type::none: + case presentation_type::dec: + num_digits = count_digits(value); + format_decimal(appender(buffer), value, num_digits); + break; + case presentation_type::hex: + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'X' : 'x') << 8 | '0'); + num_digits = count_digits<4>(value); + format_uint<4, char>(appender(buffer), value, num_digits, specs.upper); + break; + case presentation_type::oct: + num_digits = count_digits<3>(value); + // Octal prefix '0' is counted as a digit, so only add it if precision + // is not greater than the number of digits. + if (specs.alt && specs.precision <= num_digits && value != 0) + prefix_append(prefix, '0'); + format_uint<3, char>(appender(buffer), value, num_digits); + break; + case presentation_type::bin: + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'B' : 'b') << 8 | '0'); + num_digits = count_digits<1>(value); + format_uint<1, char>(appender(buffer), value, num_digits); + break; + case presentation_type::chr: + return write_char(out, static_cast(value), specs); + } + + unsigned size = (prefix != 0 ? prefix >> 24 : 0) + to_unsigned(num_digits) + + to_unsigned(grouping.count_separators(num_digits)); + return write_padded( out, specs, size, size, [&](reserve_iterator it) { - if (prefix != 0) { - char sign = static_cast(prefix); - *it++ = static_cast(sign); - } - return grouping.apply(it, string_view(digits, to_unsigned(num_digits))); + for (unsigned p = prefix & 0xffffff; p != 0; p >>= 8) + *it++ = static_cast(p & 0xff); + return grouping.apply(it, string_view(buffer.data(), buffer.size())); }); } // Writes a localized value. FMT_API auto write_loc(appender out, loc_value value, const format_specs& specs, locale_ref loc) -> bool; -template -inline auto write_loc(OutputIt, loc_value, const basic_format_specs&, - locale_ref) -> bool { +template +inline auto write_loc(OutputIt, loc_value, const format_specs&, locale_ref) + -> bool { return false; } -FMT_CONSTEXPR inline void prefix_append(unsigned& prefix, unsigned value) { - prefix |= prefix != 0 ? value << 8 : value; - prefix += (1u + (value > 0xff ? 1 : 0)) << 24; -} - template struct write_int_arg { UInt abs_value; unsigned prefix; @@ -2103,8 +2131,8 @@ FMT_CONSTEXPR auto make_write_int_arg(T value, sign_t sign) } template struct loc_writer { - buffer_appender out; - const basic_format_specs& specs; + basic_appender out; + const format_specs& specs; std::basic_string sep; std::string grouping; std::basic_string decimal_point; @@ -2117,97 +2145,94 @@ template struct loc_writer { return true; } - template ::value)> + template ::value)> auto operator()(T) -> bool { return false; } - - auto operator()(...) -> bool { return false; } }; template FMT_CONSTEXPR FMT_INLINE auto write_int(OutputIt out, write_int_arg arg, - const basic_format_specs& specs, - locale_ref) -> OutputIt { + const format_specs& specs, locale_ref) + -> OutputIt { static_assert(std::is_same>::value, ""); auto abs_value = arg.abs_value; auto prefix = arg.prefix; switch (specs.type) { + default: + FMT_ASSERT(false, ""); + FMT_FALLTHROUGH; case presentation_type::none: case presentation_type::dec: { - auto num_digits = count_digits(abs_value); - return write_int( + int num_digits = count_digits(abs_value); + return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { return format_decimal(it, abs_value, num_digits).end; }); } - case presentation_type::hex_lower: - case presentation_type::hex_upper: { - bool upper = specs.type == presentation_type::hex_upper; + case presentation_type::hex: { if (specs.alt) - prefix_append(prefix, unsigned(upper ? 'X' : 'x') << 8 | '0'); + prefix_append(prefix, unsigned(specs.upper ? 'X' : 'x') << 8 | '0'); int num_digits = count_digits<4>(abs_value); - return write_int( + return write_int( out, num_digits, prefix, specs, [=](reserve_iterator it) { - return format_uint<4, Char>(it, abs_value, num_digits, upper); + return format_uint<4, Char>(it, abs_value, num_digits, specs.upper); }); } - case presentation_type::bin_lower: - case presentation_type::bin_upper: { - bool upper = specs.type == presentation_type::bin_upper; - if (specs.alt) - prefix_append(prefix, unsigned(upper ? 'B' : 'b') << 8 | '0'); - int num_digits = count_digits<1>(abs_value); - return write_int(out, num_digits, prefix, specs, - [=](reserve_iterator it) { - return format_uint<1, Char>(it, abs_value, num_digits); - }); - } case presentation_type::oct: { int num_digits = count_digits<3>(abs_value); // Octal prefix '0' is counted as a digit, so only add it if precision // is not greater than the number of digits. if (specs.alt && specs.precision <= num_digits && abs_value != 0) prefix_append(prefix, '0'); - return write_int(out, num_digits, prefix, specs, - [=](reserve_iterator it) { - return format_uint<3, Char>(it, abs_value, num_digits); - }); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<3, Char>(it, abs_value, num_digits); + }); + } + case presentation_type::bin: { + if (specs.alt) + prefix_append(prefix, unsigned(specs.upper ? 'B' : 'b') << 8 | '0'); + int num_digits = count_digits<1>(abs_value); + return write_int( + out, num_digits, prefix, specs, [=](reserve_iterator it) { + return format_uint<1, Char>(it, abs_value, num_digits); + }); } case presentation_type::chr: - return write_char(out, static_cast(abs_value), specs); - default: - throw_format_error("invalid type specifier"); + return write_char(out, static_cast(abs_value), specs); } - return out; } template -FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline( - OutputIt out, write_int_arg arg, const basic_format_specs& specs, - locale_ref loc) -> OutputIt { - return write_int(out, arg, specs, loc); +FMT_CONSTEXPR FMT_NOINLINE auto write_int_noinline(OutputIt out, + write_int_arg arg, + const format_specs& specs, + locale_ref loc) -> OutputIt { + return write_int(out, arg, specs, loc); } -template ::value && !std::is_same::value && - std::is_same>::value)> -FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, - const basic_format_specs& specs, - locale_ref loc) -> OutputIt { + !std::is_same::value)> +FMT_CONSTEXPR FMT_INLINE auto write(basic_appender out, T value, + const format_specs& specs, locale_ref loc) + -> basic_appender { if (specs.localized && write_loc(out, value, specs, loc)) return out; - return write_int_noinline(out, make_write_int_arg(value, specs.sign), specs, - loc); + return write_int_noinline(out, make_write_int_arg(value, specs.sign), + specs, loc); } // An inlined version of write used in format string compilation. template ::value && !std::is_same::value && - !std::is_same>::value)> + !std::is_same::value && + !std::is_same>::value)> FMT_CONSTEXPR FMT_INLINE auto write(OutputIt out, T value, - const basic_format_specs& specs, - locale_ref loc) -> OutputIt { + const format_specs& specs, locale_ref loc) + -> OutputIt { if (specs.localized && write_loc(out, value, specs, loc)) return out; - return write_int(out, make_write_int_arg(value, specs.sign), specs, loc); + return write_int(out, make_write_int_arg(value, specs.sign), specs, + loc); } // An output iterator that counts the number of objects written to it and @@ -2229,63 +2254,64 @@ class counting_iterator { FMT_CONSTEXPR counting_iterator() : count_(0) {} - FMT_CONSTEXPR size_t count() const { return count_; } + FMT_CONSTEXPR auto count() const -> size_t { return count_; } - FMT_CONSTEXPR counting_iterator& operator++() { + FMT_CONSTEXPR auto operator++() -> counting_iterator& { ++count_; return *this; } - FMT_CONSTEXPR counting_iterator operator++(int) { + FMT_CONSTEXPR auto operator++(int) -> counting_iterator { auto it = *this; ++*this; return it; } - FMT_CONSTEXPR friend counting_iterator operator+(counting_iterator it, - difference_type n) { + FMT_CONSTEXPR friend auto operator+(counting_iterator it, difference_type n) + -> counting_iterator { it.count_ += static_cast(n); return it; } - FMT_CONSTEXPR value_type operator*() const { return {}; } + FMT_CONSTEXPR auto operator*() const -> value_type { return {}; } }; template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view s, - const basic_format_specs& specs) -> OutputIt { + const format_specs& specs) -> OutputIt { auto data = s.data(); auto size = s.size(); if (specs.precision >= 0 && to_unsigned(specs.precision) < size) size = code_point_index(s, to_unsigned(specs.precision)); bool is_debug = specs.type == presentation_type::debug; size_t width = 0; + + if (is_debug) size = write_escaped_string(counting_iterator{}, s).count(); + if (specs.width != 0) { if (is_debug) - width = write_escaped_string(counting_iterator{}, s).count(); + width = size; else width = compute_width(basic_string_view(data, size)); } - return write_padded(out, specs, size, width, - [=](reserve_iterator it) { - if (is_debug) return write_escaped_string(it, s); - return copy_str(data, data + size, it); - }); + return write_padded(out, specs, size, width, + [=](reserve_iterator it) { + if (is_debug) return write_escaped_string(it, s); + return copy(data, data + size, it); + }); } template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view> s, - const basic_format_specs& specs, locale_ref) - -> OutputIt { - check_string_type_spec(specs.type); - return write(out, s, specs); + const format_specs& specs, locale_ref) -> OutputIt { + return write(out, s, specs); } template -FMT_CONSTEXPR auto write(OutputIt out, const Char* s, - const basic_format_specs& specs, locale_ref) - -> OutputIt { - return check_cstring_type_spec(specs.type) - ? write(out, basic_string_view(s), specs, {}) - : write_ptr(out, bit_cast(s), &specs); +FMT_CONSTEXPR auto write(OutputIt out, const Char* s, const format_specs& specs, + locale_ref) -> OutputIt { + if (specs.type == presentation_type::pointer) + return write_ptr(out, bit_cast(s), &specs); + if (!s) report_error("string pointer is null"); + return write(out, basic_string_view(s), specs, {}); } template OutputIt { if (negative) abs_value = ~abs_value + 1; int num_digits = count_digits(abs_value); auto size = (negative ? 1 : 0) + static_cast(num_digits); - auto it = reserve(out, size); - if (auto ptr = to_pointer(it, size)) { + if (auto ptr = to_pointer(out, size)) { if (negative) *ptr++ = static_cast('-'); format_decimal(ptr, abs_value, num_digits); return out; } - if (negative) *it++ = static_cast('-'); - it = format_decimal(it, abs_value, num_digits).end; - return base_iterator(out, it); + if (negative) *out++ = static_cast('-'); + return format_decimal(out, abs_value, num_digits).end; +} + +// DEPRECATED! +template +FMT_CONSTEXPR auto parse_align(const Char* begin, const Char* end, + format_specs& specs) -> const Char* { + FMT_ASSERT(begin != end, ""); + auto align = align::none; + auto p = begin + code_point_length(begin); + if (end - p <= 0) p = begin; + for (;;) { + switch (to_ascii(*p)) { + case '<': + align = align::left; + break; + case '>': + align = align::right; + break; + case '^': + align = align::center; + break; + } + if (align != align::none) { + if (p != begin) { + auto c = *begin; + if (c == '}') return begin; + if (c == '{') { + report_error("invalid fill character '{'"); + return begin; + } + specs.fill = basic_string_view(begin, to_unsigned(p - begin)); + begin = p + 1; + } else { + ++begin; + } + break; + } else if (p == begin) { + break; + } + p = begin; + } + specs.align = align; + return begin; +} + +// A floating-point presentation format. +enum class float_format : unsigned char { + general, // General: exponent notation or fixed point based on magnitude. + exp, // Exponent notation with the default precision of 6, e.g. 1.2e-3. + fixed // Fixed point with the default precision of 6, e.g. 0.0012. +}; + +struct float_specs { + int precision; + float_format format : 8; + sign_t sign : 8; + bool locale : 1; + bool binary32 : 1; + bool showpoint : 1; +}; + +// DEPRECATED! +FMT_CONSTEXPR inline auto parse_float_type_spec(const format_specs& specs) + -> float_specs { + auto result = float_specs(); + result.showpoint = specs.alt; + result.locale = specs.localized; + switch (specs.type) { + default: + FMT_FALLTHROUGH; + case presentation_type::none: + result.format = float_format::general; + break; + case presentation_type::exp: + result.format = float_format::exp; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::fixed: + result.format = float_format::fixed; + result.showpoint |= specs.precision != 0; + break; + case presentation_type::general: + result.format = float_format::general; + break; + } + return result; } template FMT_CONSTEXPR20 auto write_nonfinite(OutputIt out, bool isnan, - basic_format_specs specs, - const float_specs& fspecs) -> OutputIt { + format_specs specs, sign_t sign) + -> OutputIt { auto str = - isnan ? (fspecs.upper ? "NAN" : "nan") : (fspecs.upper ? "INF" : "inf"); + isnan ? (specs.upper ? "NAN" : "nan") : (specs.upper ? "INF" : "inf"); constexpr size_t str_size = 3; - auto sign = fspecs.sign; auto size = str_size + (sign ? 1 : 0); // Replace '0'-padding with space for non-finite values. const bool is_zero_fill = - specs.fill.size() == 1 && *specs.fill.data() == static_cast('0'); - if (is_zero_fill) specs.fill[0] = static_cast(' '); - return write_padded(out, specs, size, [=](reserve_iterator it) { - if (sign) *it++ = detail::sign(sign); - return copy_str(str, str + str_size, it); - }); + specs.fill.size() == 1 && specs.fill.template get() == '0'; + if (is_zero_fill) specs.fill = ' '; + return write_padded(out, specs, size, + [=](reserve_iterator it) { + if (sign) *it++ = detail::sign(sign); + return copy(str, str + str_size, it); + }); } // A decimal floating-point number significand * pow(10, exp). @@ -2347,7 +2457,7 @@ inline auto get_significand_size(const dragonbox::decimal_fp& f) -> int { template constexpr auto write_significand(OutputIt out, const char* significand, int significand_size) -> OutputIt { - return copy_str(significand, significand + significand_size, out); + return copy(significand, significand + significand_size, out); } template inline auto write_significand(OutputIt out, UInt significand, @@ -2400,19 +2510,19 @@ inline auto write_significand(OutputIt out, UInt significand, Char buffer[digits10() + 2]; auto end = write_significand(buffer, significand, significand_size, integral_size, decimal_point); - return detail::copy_str_noinline(buffer, end, out); + return detail::copy_noinline(buffer, end, out); } template FMT_CONSTEXPR auto write_significand(OutputIt out, const char* significand, int significand_size, int integral_size, Char decimal_point) -> OutputIt { - out = detail::copy_str_noinline(significand, - significand + integral_size, out); + out = detail::copy_noinline(significand, significand + integral_size, + out); if (!decimal_point) return out; *out++ = decimal_point; - return detail::copy_str_noinline(significand + integral_size, - significand + significand_size, out); + return detail::copy_noinline(significand + integral_size, + significand + significand_size, out); } template @@ -2425,18 +2535,18 @@ FMT_CONSTEXPR20 auto write_significand(OutputIt out, T significand, decimal_point); } auto buffer = basic_memory_buffer(); - write_significand(buffer_appender(buffer), significand, - significand_size, integral_size, decimal_point); + write_significand(basic_appender(buffer), significand, significand_size, + integral_size, decimal_point); grouping.apply( out, basic_string_view(buffer.data(), to_unsigned(integral_size))); - return detail::copy_str_noinline(buffer.data() + integral_size, - buffer.end(), out); + return detail::copy_noinline(buffer.data() + integral_size, + buffer.end(), out); } -template > FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, - const basic_format_specs& specs, + const format_specs& specs, float_specs fspecs, locale_ref loc) -> OutputIt { auto significand = f.significand; @@ -2473,7 +2583,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, if (abs_output_exp >= 100) exp_digits = abs_output_exp >= 1000 ? 4 : 3; size += to_unsigned((decimal_point ? 1 : 0) + 2 + exp_digits); - char exp_char = fspecs.upper ? 'E' : 'e'; + char exp_char = specs.upper ? 'E' : 'e'; auto write = [=](iterator it) { if (sign) *it++ = detail::sign(sign); // Insert a decimal point after the first digit and add an exponent. @@ -2483,8 +2593,9 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, *it++ = static_cast(exp_char); return write_exponent(output_exp, it); }; - return specs.width > 0 ? write_padded(out, specs, size, write) - : base_iterator(out, write(reserve(out, size))); + return specs.width > 0 + ? write_padded(out, specs, size, write) + : base_iterator(out, write(reserve(out, size))); } int exp = f.exponent + significand_size; @@ -2495,12 +2606,12 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, abort_fuzzing_if(num_zeros > 5000); if (fspecs.showpoint) { ++size; - if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 1; + if (num_zeros <= 0 && fspecs.format != float_format::fixed) num_zeros = 0; if (num_zeros > 0) size += to_unsigned(num_zeros); } auto grouping = Grouping(loc, fspecs.locale); size += to_unsigned(grouping.count_separators(exp)); - return write_padded(out, specs, size, [&](iterator it) { + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); it = write_significand(it, significand, significand_size, f.exponent, grouping); @@ -2513,8 +2624,8 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, int num_zeros = fspecs.showpoint ? fspecs.precision - significand_size : 0; size += 1 + to_unsigned(num_zeros > 0 ? num_zeros : 0); auto grouping = Grouping(loc, fspecs.locale); - size += to_unsigned(grouping.count_separators(significand_size)); - return write_padded(out, specs, size, [&](iterator it) { + size += to_unsigned(grouping.count_separators(exp)); + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); it = write_significand(it, significand, significand_size, exp, decimal_point, grouping); @@ -2529,7 +2640,7 @@ FMT_CONSTEXPR20 auto do_write_float(OutputIt out, const DecimalFP& f, } bool pointy = num_zeros != 0 || significand_size != 0 || fspecs.showpoint; size += 1 + (pointy ? 1 : 0) + to_unsigned(num_zeros); - return write_padded(out, specs, size, [&](iterator it) { + return write_padded(out, specs, size, [&](iterator it) { if (sign) *it++ = detail::sign(sign); *it++ = zero; if (!pointy) return it; @@ -2543,32 +2654,31 @@ template class fallback_digit_grouping { public: constexpr fallback_digit_grouping(locale_ref, bool) {} - constexpr bool has_separator() const { return false; } + constexpr auto has_separator() const -> bool { return false; } - constexpr int count_separators(int) const { return 0; } + constexpr auto count_separators(int) const -> int { return 0; } template - constexpr Out apply(Out out, basic_string_view) const { + constexpr auto apply(Out out, basic_string_view) const -> Out { return out; } }; -template +template FMT_CONSTEXPR20 auto write_float(OutputIt out, const DecimalFP& f, - const basic_format_specs& specs, - float_specs fspecs, locale_ref loc) - -> OutputIt { + const format_specs& specs, float_specs fspecs, + locale_ref loc) -> OutputIt { if (is_constant_evaluated()) { - return do_write_float>(out, f, specs, fspecs, loc); } else { - return do_write_float(out, f, specs, fspecs, loc); + return do_write_float(out, f, specs, fspecs, loc); } } -template constexpr bool isnan(T value) { - return !(value >= value); // std::isnan doesn't support __float128. +template constexpr auto isnan(T value) -> bool { + return value != value; // std::isnan doesn't support __float128. } template @@ -2580,14 +2690,14 @@ struct has_isfinite> template ::value&& has_isfinite::value)> -FMT_CONSTEXPR20 bool isfinite(T value) { +FMT_CONSTEXPR20 auto isfinite(T value) -> bool { constexpr T inf = T(std::numeric_limits::infinity()); if (is_constant_evaluated()) return !detail::isnan(value) && value < inf && value > -inf; return std::isfinite(value); } template ::value)> -FMT_CONSTEXPR bool isfinite(T value) { +FMT_CONSTEXPR auto isfinite(T value) -> bool { T inf = T(std::numeric_limits::infinity()); // std::isfinite doesn't support __float128. return !detail::isnan(value) && value < inf && value > -inf; @@ -2606,78 +2716,6 @@ FMT_INLINE FMT_CONSTEXPR bool signbit(T value) { return std::signbit(static_cast(value)); } -enum class round_direction { unknown, up, down }; - -// Given the divisor (normally a power of 10), the remainder = v % divisor for -// some number v and the error, returns whether v should be rounded up, down, or -// whether the rounding direction can't be determined due to error. -// error should be less than divisor / 2. -FMT_CONSTEXPR inline round_direction get_round_direction(uint64_t divisor, - uint64_t remainder, - uint64_t error) { - FMT_ASSERT(remainder < divisor, ""); // divisor - remainder won't overflow. - FMT_ASSERT(error < divisor, ""); // divisor - error won't overflow. - FMT_ASSERT(error < divisor - error, ""); // error * 2 won't overflow. - // Round down if (remainder + error) * 2 <= divisor. - if (remainder <= divisor - remainder && error * 2 <= divisor - remainder * 2) - return round_direction::down; - // Round up if (remainder - error) * 2 >= divisor. - if (remainder >= error && - remainder - error >= divisor - (remainder - error)) { - return round_direction::up; - } - return round_direction::unknown; -} - -namespace digits { -enum result { - more, // Generate more digits. - done, // Done generating digits. - error // Digit generation cancelled due to an error. -}; -} - -struct gen_digits_handler { - char* buf; - int size; - int precision; - int exp10; - bool fixed; - - FMT_CONSTEXPR digits::result on_digit(char digit, uint64_t divisor, - uint64_t remainder, uint64_t error, - bool integral) { - FMT_ASSERT(remainder < divisor, ""); - buf[size++] = digit; - if (!integral && error >= remainder) return digits::error; - if (size < precision) return digits::more; - if (!integral) { - // Check if error * 2 < divisor with overflow prevention. - // The check is not needed for the integral part because error = 1 - // and divisor > (1 << 32) there. - if (error >= divisor || error >= divisor - error) return digits::error; - } else { - FMT_ASSERT(error == 1 && divisor > 2, ""); - } - auto dir = get_round_direction(divisor, remainder, error); - if (dir != round_direction::up) - return dir == round_direction::down ? digits::done : digits::error; - ++buf[size - 1]; - for (int i = size - 1; i > 0 && buf[i] > '9'; --i) { - buf[i] = '0'; - ++buf[i - 1]; - } - if (buf[0] > '9') { - buf[0] = '1'; - if (fixed) - buf[size++] = '0'; - else - ++exp10; - } - return digits::done; - } -}; - inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { // Adjust fixed precision by exponent because it is relative to decimal // point. @@ -2686,101 +2724,6 @@ inline FMT_CONSTEXPR20 void adjust_precision(int& precision, int exp10) { precision += exp10; } -// Generates output using the Grisu digit-gen algorithm. -// error: the size of the region (lower, upper) outside of which numbers -// definitely do not round to value (Delta in Grisu3). -FMT_INLINE FMT_CONSTEXPR20 auto grisu_gen_digits(fp value, uint64_t error, - int& exp, - gen_digits_handler& handler) - -> digits::result { - const fp one(1ULL << -value.e, value.e); - // The integral part of scaled value (p1 in Grisu) = value / one. It cannot be - // zero because it contains a product of two 64-bit numbers with MSB set (due - // to normalization) - 1, shifted right by at most 60 bits. - auto integral = static_cast(value.f >> -one.e); - FMT_ASSERT(integral != 0, ""); - FMT_ASSERT(integral == value.f >> -one.e, ""); - // The fractional part of scaled value (p2 in Grisu) c = value % one. - uint64_t fractional = value.f & (one.f - 1); - exp = count_digits(integral); // kappa in Grisu. - // Non-fixed formats require at least one digit and no precision adjustment. - if (handler.fixed) { - adjust_precision(handler.precision, exp + handler.exp10); - // Check if precision is satisfied just by leading zeros, e.g. - // format("{:.2f}", 0.001) gives "0.00" without generating any digits. - if (handler.precision <= 0) { - if (handler.precision < 0) return digits::done; - // Divide by 10 to prevent overflow. - uint64_t divisor = data::power_of_10_64[exp - 1] << -one.e; - auto dir = get_round_direction(divisor, value.f / 10, error * 10); - if (dir == round_direction::unknown) return digits::error; - handler.buf[handler.size++] = dir == round_direction::up ? '1' : '0'; - return digits::done; - } - } - // Generate digits for the integral part. This can produce up to 10 digits. - do { - uint32_t digit = 0; - auto divmod_integral = [&](uint32_t divisor) { - digit = integral / divisor; - integral %= divisor; - }; - // This optimization by Milo Yip reduces the number of integer divisions by - // one per iteration. - switch (exp) { - case 10: - divmod_integral(1000000000); - break; - case 9: - divmod_integral(100000000); - break; - case 8: - divmod_integral(10000000); - break; - case 7: - divmod_integral(1000000); - break; - case 6: - divmod_integral(100000); - break; - case 5: - divmod_integral(10000); - break; - case 4: - divmod_integral(1000); - break; - case 3: - divmod_integral(100); - break; - case 2: - divmod_integral(10); - break; - case 1: - digit = integral; - integral = 0; - break; - default: - FMT_ASSERT(false, "invalid number of digits"); - } - --exp; - auto remainder = (static_cast(integral) << -one.e) + fractional; - auto result = handler.on_digit(static_cast('0' + digit), - data::power_of_10_64[exp] << -one.e, - remainder, error, true); - if (result != digits::more) return result; - } while (exp > 0); - // Generate digits for the fractional part. - for (;;) { - fractional *= 10; - error *= 10; - char digit = static_cast('0' + (fractional >> -one.e)); - fractional &= one.f - 1; - --exp; - auto result = handler.on_digit(digit, one.f, fractional, error, false); - if (result != digits::more) return result; - } -} - class bigint { private: // A bigint is stored as an array of bigits (big digits), with bigit at index @@ -2791,10 +2734,10 @@ class bigint { basic_memory_buffer bigits_; int exp_; - FMT_CONSTEXPR20 bigit operator[](int index) const { + FMT_CONSTEXPR20 auto operator[](int index) const -> bigit { return bigits_[to_unsigned(index)]; } - FMT_CONSTEXPR20 bigit& operator[](int index) { + FMT_CONSTEXPR20 auto operator[](int index) -> bigit& { return bigits_[to_unsigned(index)]; } @@ -2881,7 +2824,7 @@ class bigint { auto size = other.bigits_.size(); bigits_.resize(size); auto data = other.bigits_.data(); - std::copy(data, data + size, make_checked(bigits_.data(), size)); + copy(data, data + size, bigits_.data()); exp_ = other.exp_; } @@ -2890,11 +2833,11 @@ class bigint { assign(uint64_or_128_t(n)); } - FMT_CONSTEXPR20 int num_bigits() const { + FMT_CONSTEXPR20 auto num_bigits() const -> int { return static_cast(bigits_.size()) + exp_; } - FMT_NOINLINE FMT_CONSTEXPR20 bigint& operator<<=(int shift) { + FMT_NOINLINE FMT_CONSTEXPR20 auto operator<<=(int shift) -> bigint& { FMT_ASSERT(shift >= 0, ""); exp_ += shift / bigit_bits; shift %= bigit_bits; @@ -2909,13 +2852,15 @@ class bigint { return *this; } - template FMT_CONSTEXPR20 bigint& operator*=(Int value) { + template + FMT_CONSTEXPR20 auto operator*=(Int value) -> bigint& { FMT_ASSERT(value > 0, ""); multiply(uint32_or_64_or_128_t(value)); return *this; } - friend FMT_CONSTEXPR20 int compare(const bigint& lhs, const bigint& rhs) { + friend FMT_CONSTEXPR20 auto compare(const bigint& lhs, const bigint& rhs) + -> int { int num_lhs_bigits = lhs.num_bigits(), num_rhs_bigits = rhs.num_bigits(); if (num_lhs_bigits != num_rhs_bigits) return num_lhs_bigits > num_rhs_bigits ? 1 : -1; @@ -2932,8 +2877,9 @@ class bigint { } // Returns compare(lhs1 + lhs2, rhs). - friend FMT_CONSTEXPR20 int add_compare(const bigint& lhs1, const bigint& lhs2, - const bigint& rhs) { + friend FMT_CONSTEXPR20 auto add_compare(const bigint& lhs1, + const bigint& lhs2, const bigint& rhs) + -> int { auto minimum = [](int a, int b) { return a < b ? a : b; }; auto maximum = [](int a, int b) { return a > b ? a : b; }; int max_lhs_bigits = maximum(lhs1.num_bigits(), lhs2.num_bigits()); @@ -3014,13 +2960,13 @@ class bigint { bigits_.resize(to_unsigned(num_bigits + exp_difference)); for (int i = num_bigits - 1, j = i + exp_difference; i >= 0; --i, --j) bigits_[j] = bigits_[i]; - std::uninitialized_fill_n(bigits_.data(), exp_difference, 0); + memset(bigits_.data(), 0, to_unsigned(exp_difference) * sizeof(bigit)); exp_ -= exp_difference; } // Divides this bignum by divisor, assigning the remainder to this and // returning the quotient. - FMT_CONSTEXPR20 int divmod_assign(const bigint& divisor) { + FMT_CONSTEXPR20 auto divmod_assign(const bigint& divisor) -> int { FMT_ASSERT(this != &divisor, ""); if (compare(*this, divisor) < 0) return 0; FMT_ASSERT(divisor.bigits_[divisor.bigits_.size() - 1u] != 0, ""); @@ -3095,6 +3041,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } int even = static_cast((value.f & 1) == 0); if (!upper) upper = &lower; + bool shortest = num_digits < 0; if ((flags & dragon::fixup) != 0) { if (add_compare(numerator, *upper, denominator) + even <= 0) { --exp10; @@ -3107,7 +3054,7 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, if ((flags & dragon::fixed) != 0) adjust_precision(num_digits, exp10 + 1); } // Invariant: value == (numerator / denominator) * pow(10, exp10). - if (num_digits < 0) { + if (shortest) { // Generate the shortest representation. num_digits = 0; char* data = buf.data(); @@ -3137,9 +3084,12 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } // Generate the given number of digits. exp10 -= num_digits - 1; - if (num_digits == 0) { - denominator *= 10; - auto digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + if (num_digits <= 0) { + auto digit = '0'; + if (num_digits == 0) { + denominator *= 10; + digit = add_compare(numerator, numerator, denominator) > 0 ? '1' : '0'; + } buf.push_back(digit); return; } @@ -3162,7 +3112,10 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, } if (buf[0] == overflow) { buf[0] = '1'; - ++exp10; + if ((flags & dragon::fixed) != 0) + buf.push_back('0'); + else + ++exp10; } return; } @@ -3171,6 +3124,105 @@ FMT_CONSTEXPR20 inline void format_dragon(basic_fp value, buf[num_digits - 1] = static_cast('0' + digit); } +// Formats a floating-point number using the hexfloat format. +template ::value)> +FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs, + buffer& buf) { + // float is passed as double to reduce the number of instantiations and to + // simplify implementation. + static_assert(!std::is_same::value, ""); + + using info = dragonbox::float_info; + + // Assume Float is in the format [sign][exponent][significand]. + using carrier_uint = typename info::carrier_uint; + + constexpr auto num_float_significand_bits = + detail::num_significand_bits(); + + basic_fp f(value); + f.e += num_float_significand_bits; + if (!has_implicit_bit()) --f.e; + + constexpr auto num_fraction_bits = + num_float_significand_bits + (has_implicit_bit() ? 1 : 0); + constexpr auto num_xdigits = (num_fraction_bits + 3) / 4; + + constexpr auto leading_shift = ((num_xdigits - 1) * 4); + const auto leading_mask = carrier_uint(0xF) << leading_shift; + const auto leading_xdigit = + static_cast((f.f & leading_mask) >> leading_shift); + if (leading_xdigit > 1) f.e -= (32 - countl_zero(leading_xdigit) - 1); + + int print_xdigits = num_xdigits - 1; + if (specs.precision >= 0 && print_xdigits > specs.precision) { + const int shift = ((print_xdigits - specs.precision - 1) * 4); + const auto mask = carrier_uint(0xF) << shift; + const auto v = static_cast((f.f & mask) >> shift); + + if (v >= 8) { + const auto inc = carrier_uint(1) << (shift + 4); + f.f += inc; + f.f &= ~(inc - 1); + } + + // Check long double overflow + if (!has_implicit_bit()) { + const auto implicit_bit = carrier_uint(1) << num_float_significand_bits; + if ((f.f & implicit_bit) == implicit_bit) { + f.f >>= 4; + f.e += 4; + } + } + + print_xdigits = specs.precision; + } + + char xdigits[num_bits() / 4]; + detail::fill_n(xdigits, sizeof(xdigits), '0'); + format_uint<4>(xdigits, f.f, num_xdigits, specs.upper); + + // Remove zero tail + while (print_xdigits > 0 && xdigits[print_xdigits] == '0') --print_xdigits; + + buf.push_back('0'); + buf.push_back(specs.upper ? 'X' : 'x'); + buf.push_back(xdigits[0]); + if (specs.alt || print_xdigits > 0 || print_xdigits < specs.precision) + buf.push_back('.'); + buf.append(xdigits + 1, xdigits + 1 + print_xdigits); + for (; print_xdigits < specs.precision; ++print_xdigits) buf.push_back('0'); + + buf.push_back(specs.upper ? 'P' : 'p'); + + uint32_t abs_e; + if (f.e < 0) { + buf.push_back('-'); + abs_e = static_cast(-f.e); + } else { + buf.push_back('+'); + abs_e = static_cast(f.e); + } + format_decimal(appender(buf), abs_e, detail::count_digits(abs_e)); +} + +template ::value)> +FMT_CONSTEXPR20 void format_hexfloat(Float value, format_specs specs, + buffer& buf) { + format_hexfloat(static_cast(value), specs, buf); +} + +constexpr auto fractional_part_rounding_thresholds(int index) -> uint32_t { + // For checking rounding thresholds. + // The kth entry is chosen to be the smallest integer such that the + // upper 32-bits of 10^(k+1) times it is strictly bigger than 5 * 10^k. + // It is equal to ceil(2^31 + 2^32/10^(k + 1)). + // These are stored in a string literal because we cannot have static arrays + // in constexpr functions and non-static ones are poorly optimized. + return U"\x9999999a\x828f5c29\x80418938\x80068db9\x8000a7c6\x800010c7" + U"\x800001ae\x8000002b"[index]; +} + template FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, buffer& buf) -> int { @@ -3193,7 +3245,7 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, int exp = 0; bool use_dragon = true; unsigned dragon_flags = 0; - if (!is_fast_float()) { + if (!is_fast_float() || is_constant_evaluated()) { const auto inv_log2_10 = 0.3010299956639812; // 1 / log2(10) using info = dragonbox::float_info; const auto f = basic_fp(converted_value); @@ -3201,37 +3253,259 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, // 10^(exp - 1) <= value < 10^exp or 10^exp <= value < 10^(exp + 1). // This is based on log10(value) == log2(value) / log2(10) and approximation // of log2(value) by e + num_fraction_bits idea from double-conversion. - exp = static_cast( - std::ceil((f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10)); + auto e = (f.e + count_digits<1>(f.f) - 1) * inv_log2_10 - 1e-10; + exp = static_cast(e); + if (e > exp) ++exp; // Compute ceil. dragon_flags = dragon::fixup; - } else if (!is_constant_evaluated() && precision < 0) { + } else if (precision < 0) { // Use Dragonbox for the shortest format. if (specs.binary32) { auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); + write(appender(buf), dec.significand); return dec.exponent; } auto dec = dragonbox::to_decimal(static_cast(value)); - write(buffer_appender(buf), dec.significand); + write(appender(buf), dec.significand); return dec.exponent; } else { - // Use Grisu + Dragon4 for the given precision: - // https://www.cs.tufts.edu/~nr/cs257/archive/florian-loitsch/printf.pdf. - const int min_exp = -60; // alpha in Grisu. - int cached_exp10 = 0; // K in Grisu. - fp normalized = normalize(fp(converted_value)); - const auto cached_pow = get_cached_power( - min_exp - (normalized.e + fp::num_significand_bits), cached_exp10); - normalized = normalized * cached_pow; - gen_digits_handler handler{buf.data(), 0, precision, -cached_exp10, fixed}; - if (grisu_gen_digits(normalized, 1, exp, handler) != digits::error && - !is_constant_evaluated()) { - exp += handler.exp10; - buf.try_resize(to_unsigned(handler.size)); - use_dragon = false; + // Extract significand bits and exponent bits. + using info = dragonbox::float_info; + auto br = bit_cast(static_cast(value)); + + const uint64_t significand_mask = + (static_cast(1) << num_significand_bits()) - 1; + uint64_t significand = (br & significand_mask); + int exponent = static_cast((br & exponent_mask()) >> + num_significand_bits()); + + if (exponent != 0) { // Check if normal. + exponent -= exponent_bias() + num_significand_bits(); + significand |= + (static_cast(1) << num_significand_bits()); + significand <<= 1; } else { - exp += handler.size - cached_exp10 - 1; - precision = handler.precision; + // Normalize subnormal inputs. + FMT_ASSERT(significand != 0, "zeros should not appear here"); + int shift = countl_zero(significand); + FMT_ASSERT(shift >= num_bits() - num_significand_bits(), + ""); + shift -= (num_bits() - num_significand_bits() - 2); + exponent = (std::numeric_limits::min_exponent - + num_significand_bits()) - + shift; + significand <<= shift; + } + + // Compute the first several nonzero decimal significand digits. + // We call the number we get the first segment. + const int k = info::kappa - dragonbox::floor_log10_pow2(exponent); + exp = -k; + const int beta = exponent + dragonbox::floor_log2_pow10(k); + uint64_t first_segment; + bool has_more_segments; + int digits_in_the_first_segment; + { + const auto r = dragonbox::umul192_upper128( + significand << beta, dragonbox::get_cached_power(k)); + first_segment = r.high(); + has_more_segments = r.low() != 0; + + // The first segment can have 18 ~ 19 digits. + if (first_segment >= 1000000000000000000ULL) { + digits_in_the_first_segment = 19; + } else { + // When it is of 18-digits, we align it to 19-digits by adding a bogus + // zero at the end. + digits_in_the_first_segment = 18; + first_segment *= 10; + } + } + + // Compute the actual number of decimal digits to print. + if (fixed) adjust_precision(precision, exp + digits_in_the_first_segment); + + // Use Dragon4 only when there might be not enough digits in the first + // segment. + if (digits_in_the_first_segment > precision) { + use_dragon = false; + + if (precision <= 0) { + exp += digits_in_the_first_segment; + + if (precision < 0) { + // Nothing to do, since all we have are just leading zeros. + buf.try_resize(0); + } else { + // We may need to round-up. + buf.try_resize(1); + if ((first_segment | static_cast(has_more_segments)) > + 5000000000000000000ULL) { + buf[0] = '1'; + } else { + buf[0] = '0'; + } + } + } // precision <= 0 + else { + exp += digits_in_the_first_segment - precision; + + // When precision > 0, we divide the first segment into three + // subsegments, each with 9, 9, and 0 ~ 1 digits so that each fits + // in 32-bits which usually allows faster calculation than in + // 64-bits. Since some compiler (e.g. MSVC) doesn't know how to optimize + // division-by-constant for large 64-bit divisors, we do it here + // manually. The magic number 7922816251426433760 below is equal to + // ceil(2^(64+32) / 10^10). + const uint32_t first_subsegment = static_cast( + dragonbox::umul128_upper64(first_segment, 7922816251426433760ULL) >> + 32); + const uint64_t second_third_subsegments = + first_segment - first_subsegment * 10000000000ULL; + + uint64_t prod; + uint32_t digits; + bool should_round_up; + int number_of_digits_to_print = precision > 9 ? 9 : precision; + + // Print a 9-digits subsegment, either the first or the second. + auto print_subsegment = [&](uint32_t subsegment, char* buffer) { + int number_of_digits_printed = 0; + + // If we want to print an odd number of digits from the subsegment, + if ((number_of_digits_to_print & 1) != 0) { + // Convert to 64-bit fixed-point fractional form with 1-digit + // integer part. The magic number 720575941 is a good enough + // approximation of 2^(32 + 24) / 10^8; see + // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case + // for details. + prod = ((subsegment * static_cast(720575941)) >> 24) + 1; + digits = static_cast(prod >> 32); + *buffer = static_cast('0' + digits); + number_of_digits_printed++; + } + // If we want to print an even number of digits from the + // first_subsegment, + else { + // Convert to 64-bit fixed-point fractional form with 2-digits + // integer part. The magic number 450359963 is a good enough + // approximation of 2^(32 + 20) / 10^7; see + // https://jk-jeon.github.io/posts/2022/12/fixed-precision-formatting/#fixed-length-case + // for details. + prod = ((subsegment * static_cast(450359963)) >> 20) + 1; + digits = static_cast(prod >> 32); + copy2(buffer, digits2(digits)); + number_of_digits_printed += 2; + } + + // Print all digit pairs. + while (number_of_digits_printed < number_of_digits_to_print) { + prod = static_cast(prod) * static_cast(100); + digits = static_cast(prod >> 32); + copy2(buffer + number_of_digits_printed, digits2(digits)); + number_of_digits_printed += 2; + } + }; + + // Print first subsegment. + print_subsegment(first_subsegment, buf.data()); + + // Perform rounding if the first subsegment is the last subsegment to + // print. + if (precision <= 9) { + // Rounding inside the subsegment. + // We round-up if: + // - either the fractional part is strictly larger than 1/2, or + // - the fractional part is exactly 1/2 and the last digit is odd. + // We rely on the following observations: + // - If fractional_part >= threshold, then the fractional part is + // strictly larger than 1/2. + // - If the MSB of fractional_part is set, then the fractional part + // must be at least 1/2. + // - When the MSB of fractional_part is set, either + // second_third_subsegments being nonzero or has_more_segments + // being true means there are further digits not printed, so the + // fractional part is strictly larger than 1/2. + if (precision < 9) { + uint32_t fractional_part = static_cast(prod); + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (second_third_subsegments != 0) | + has_more_segments)) != 0; + } + // Rounding at the subsegment boundary. + // In this case, the fractional part is at least 1/2 if and only if + // second_third_subsegments >= 5000000000ULL, and is strictly larger + // than 1/2 if we further have either second_third_subsegments > + // 5000000000ULL or has_more_segments == true. + else { + should_round_up = second_third_subsegments > 5000000000ULL || + (second_third_subsegments == 5000000000ULL && + ((digits & 1) != 0 || has_more_segments)); + } + } + // Otherwise, print the second subsegment. + else { + // Compilers are not aware of how to leverage the maximum value of + // second_third_subsegments to find out a better magic number which + // allows us to eliminate an additional shift. 1844674407370955162 = + // ceil(2^64/10) < ceil(2^64*(10^9/(10^10 - 1))). + const uint32_t second_subsegment = + static_cast(dragonbox::umul128_upper64( + second_third_subsegments, 1844674407370955162ULL)); + const uint32_t third_subsegment = + static_cast(second_third_subsegments) - + second_subsegment * 10; + + number_of_digits_to_print = precision - 9; + print_subsegment(second_subsegment, buf.data() + 9); + + // Rounding inside the subsegment. + if (precision < 18) { + // The condition third_subsegment != 0 implies that the segment was + // of 19 digits, so in this case the third segment should be + // consisting of a genuine digit from the input. + uint32_t fractional_part = static_cast(prod); + should_round_up = + fractional_part >= fractional_part_rounding_thresholds( + 8 - number_of_digits_to_print) || + ((fractional_part >> 31) & + ((digits & 1) | (third_subsegment != 0) | + has_more_segments)) != 0; + } + // Rounding at the subsegment boundary. + else { + // In this case, the segment must be of 19 digits, thus + // the third subsegment should be consisting of a genuine digit from + // the input. + should_round_up = third_subsegment > 5 || + (third_subsegment == 5 && + ((digits & 1) != 0 || has_more_segments)); + } + } + + // Round-up if necessary. + if (should_round_up) { + ++buf[precision - 1]; + for (int i = precision - 1; i > 0 && buf[i] > '9'; --i) { + buf[i] = '0'; + ++buf[i - 1]; + } + if (buf[0] > '9') { + buf[0] = '1'; + if (fixed) + buf[precision++] = '0'; + else + ++exp; + } + } + buf.try_resize(to_unsigned(precision)); + } + } // if (digits_in_the_first_segment > precision) + else { + // Adjust the exponent for its use in Dragon4. + exp += digits_in_the_first_segment - 1; } } if (use_dragon) { @@ -3258,100 +3532,102 @@ FMT_CONSTEXPR20 auto format_float(Float value, int precision, float_specs specs, } return exp; } + template -FMT_CONSTEXPR20 auto write_float(OutputIt out, T value, - basic_format_specs specs, locale_ref loc) - -> OutputIt { - float_specs fspecs = parse_float_type_spec(specs); - fspecs.sign = specs.sign; +FMT_CONSTEXPR20 auto write_float(OutputIt out, T value, format_specs specs, + locale_ref loc) -> OutputIt { + sign_t sign = specs.sign; if (detail::signbit(value)) { // value < 0 is false for NaN so use signbit. - fspecs.sign = sign::minus; + sign = sign::minus; value = -value; - } else if (fspecs.sign == sign::minus) { - fspecs.sign = sign::none; + } else if (sign == sign::minus) { + sign = sign::none; } if (!detail::isfinite(value)) - return write_nonfinite(out, detail::isnan(value), specs, fspecs); + return write_nonfinite(out, detail::isnan(value), specs, sign); - if (specs.align == align::numeric && fspecs.sign) { + if (specs.align == align::numeric && sign) { auto it = reserve(out, 1); - *it++ = detail::sign(fspecs.sign); + *it++ = detail::sign(sign); out = base_iterator(out, it); - fspecs.sign = sign::none; + sign = sign::none; if (specs.width != 0) --specs.width; } memory_buffer buffer; - if (fspecs.format == float_format::hex) { - if (fspecs.sign) buffer.push_back(detail::sign(fspecs.sign)); - snprintf_float(convert_float(value), specs.precision, fspecs, buffer); - return write_bytes(out, {buffer.data(), buffer.size()}, - specs); + if (specs.type == presentation_type::hexfloat) { + if (sign) buffer.push_back(detail::sign(sign)); + format_hexfloat(convert_float(value), specs, buffer); + return write_bytes(out, {buffer.data(), buffer.size()}, + specs); } + int precision = specs.precision >= 0 || specs.type == presentation_type::none ? specs.precision : 6; - if (fspecs.format == float_format::exp) { + if (specs.type == presentation_type::exp) { if (precision == max_value()) - throw_format_error("number is too big"); + report_error("number is too big"); else ++precision; - } else if (fspecs.format != float_format::fixed && precision == 0) { + } else if (specs.type != presentation_type::fixed && precision == 0) { precision = 1; } + float_specs fspecs = parse_float_type_spec(specs); + fspecs.sign = sign; if (const_check(std::is_same())) fspecs.binary32 = true; int exp = format_float(convert_float(value), precision, fspecs, buffer); fspecs.precision = precision; auto f = big_decimal_fp{buffer.data(), static_cast(buffer.size()), exp}; - return write_float(out, f, specs, fspecs, loc); + return write_float(out, f, specs, fspecs, loc); } template ::value)> -FMT_CONSTEXPR20 auto write(OutputIt out, T value, - basic_format_specs specs, locale_ref loc = {}) - -> OutputIt { +FMT_CONSTEXPR20 auto write(OutputIt out, T value, format_specs specs, + locale_ref loc = {}) -> OutputIt { if (const_check(!is_supported_floating_point(value))) return out; return specs.localized && write_loc(out, value, specs, loc) ? out - : write_float(out, value, specs, loc); + : write_float(out, value, specs, loc); } template ::value)> FMT_CONSTEXPR20 auto write(OutputIt out, T value) -> OutputIt { - if (is_constant_evaluated()) - return write(out, value, basic_format_specs()); + if (is_constant_evaluated()) return write(out, value, format_specs()); if (const_check(!is_supported_floating_point(value))) return out; - auto fspecs = float_specs(); + auto sign = sign_t::none; if (detail::signbit(value)) { - fspecs.sign = sign::minus; + sign = sign::minus; value = -value; } - constexpr auto specs = basic_format_specs(); + constexpr auto specs = format_specs(); using floaty = conditional_t::value, double, T>; - using uint = typename dragonbox::float_info::carrier_uint; - uint mask = exponent_mask(); - if ((bit_cast(value) & mask) == mask) - return write_nonfinite(out, std::isnan(value), specs, fspecs); + using floaty_uint = typename dragonbox::float_info::carrier_uint; + floaty_uint mask = exponent_mask(); + if ((bit_cast(value) & mask) == mask) + return write_nonfinite(out, std::isnan(value), specs, sign); + auto fspecs = float_specs(); + fspecs.sign = sign; auto dec = dragonbox::to_decimal(static_cast(value)); - return write_float(out, dec, specs, fspecs, {}); + return write_float(out, dec, specs, fspecs, {}); } template ::value && !is_fast_float::value)> inline auto write(OutputIt out, T value) -> OutputIt { - return write(out, value, basic_format_specs()); + return write(out, value, format_specs()); } template -auto write(OutputIt out, monostate, basic_format_specs = {}, - locale_ref = {}) -> OutputIt { +auto write(OutputIt out, monostate, format_specs = {}, locale_ref = {}) + -> OutputIt { FMT_ASSERT(false, ""); return out; } @@ -3359,13 +3635,11 @@ auto write(OutputIt out, monostate, basic_format_specs = {}, template FMT_CONSTEXPR auto write(OutputIt out, basic_string_view value) -> OutputIt { - auto it = reserve(out, value.size()); - it = copy_str_noinline(value.begin(), value.end(), it); - return base_iterator(out, it); + return copy_noinline(value.begin(), value.end(), out); } template ::value)> + FMT_ENABLE_IF(has_to_string_view::value)> constexpr auto write(OutputIt out, const T& value) -> OutputIt { return write(out, to_string_view(value)); } @@ -3384,13 +3658,12 @@ FMT_CONSTEXPR auto write(OutputIt out, T value) -> OutputIt { template ::value)> -FMT_CONSTEXPR auto write(OutputIt out, T value, - const basic_format_specs& specs = {}, +FMT_CONSTEXPR auto write(OutputIt out, T value, const format_specs& specs = {}, locale_ref = {}) -> OutputIt { return specs.type != presentation_type::none && specs.type != presentation_type::string - ? write(out, value ? 1 : 0, specs, {}) - : write_bytes(out, value ? "true" : "false", specs); + ? write(out, value ? 1 : 0, specs, {}) + : write_bytes(out, value ? "true" : "false", specs); } template @@ -3401,22 +3674,16 @@ FMT_CONSTEXPR auto write(OutputIt out, Char value) -> OutputIt { } template -FMT_CONSTEXPR_CHAR_TRAITS auto write(OutputIt out, const Char* value) - -> OutputIt { - if (!value) { - throw_format_error("string pointer is null"); - } else { - out = write(out, basic_string_view(value)); - } +FMT_CONSTEXPR20 auto write(OutputIt out, const Char* value) -> OutputIt { + if (value) return write(out, basic_string_view(value)); + report_error("string pointer is null"); return out; } template ::value)> -auto write(OutputIt out, const T* value, - const basic_format_specs& specs = {}, locale_ref = {}) - -> OutputIt { - check_pointer_type_spec(specs.type, error_handler()); +auto write(OutputIt out, const T* value, const format_specs& specs = {}, + locale_ref = {}) -> OutputIt { return write_ptr(out, bit_cast(value), &specs); } @@ -3424,7 +3691,7 @@ auto write(OutputIt out, const T* value, template > FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t< - std::is_class::value && !is_string::value && + std::is_class::value && !has_to_string_view::value && !is_floating_point::value && !std::is_same::value && !std::is_same().map( value))>>::value, @@ -3435,21 +3702,22 @@ FMT_CONSTEXPR auto write(OutputIt out, const T& value) -> enable_if_t< template > FMT_CONSTEXPR auto write(OutputIt out, const T& value) - -> enable_if_t::value == type::custom_type, + -> enable_if_t::value == + type::custom_type && + !std::is_fundamental::value, OutputIt> { - using formatter_type = - conditional_t::value, - typename Context::template formatter_type, - fallback_formatter>; + auto formatter = typename Context::template formatter_type(); + auto parse_ctx = typename Context::parse_context_type({}); + formatter.parse(parse_ctx); auto ctx = Context(out, {}, {}); - return formatter_type().format(value, ctx); + return formatter.format(value, ctx); } // An argument visitor that formats the argument and writes it via the output // iterator. It's a class and not a generic lambda for compatibility with C++11. template struct default_arg_formatter { - using iterator = buffer_appender; - using context = buffer_context; + using iterator = basic_appender; + using context = buffered_context; iterator out; basic_format_args args; @@ -3467,16 +3735,16 @@ template struct default_arg_formatter { }; template struct arg_formatter { - using iterator = buffer_appender; - using context = buffer_context; + using iterator = basic_appender; + using context = buffered_context; iterator out; - const basic_format_specs& specs; + const format_specs& specs; locale_ref locale; template FMT_CONSTEXPR FMT_INLINE auto operator()(T value) -> iterator { - return detail::write(out, value, specs, locale); + return detail::write(out, value, specs, locale); } auto operator()(typename basic_format_arg::handle) -> iterator { // User-defined types are handled separately because they require access @@ -3485,116 +3753,49 @@ template struct arg_formatter { } }; -template struct custom_formatter { - basic_format_parse_context& parse_ctx; - buffer_context& ctx; - - void operator()( - typename basic_format_arg>::handle h) const { - h.format(parse_ctx, ctx); - } - template void operator()(T) const {} -}; - -template class width_checker { - public: - explicit FMT_CONSTEXPR width_checker(ErrorHandler& eh) : handler_(eh) {} - +struct width_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative width"); + if (is_negative(value)) report_error("negative width"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("width is not integer"); + report_error("width is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template class precision_checker { - public: - explicit FMT_CONSTEXPR precision_checker(ErrorHandler& eh) : handler_(eh) {} - +struct precision_checker { template ::value)> FMT_CONSTEXPR auto operator()(T value) -> unsigned long long { - if (is_negative(value)) handler_.on_error("negative precision"); + if (is_negative(value)) report_error("negative precision"); return static_cast(value); } template ::value)> FMT_CONSTEXPR auto operator()(T) -> unsigned long long { - handler_.on_error("precision is not integer"); + report_error("precision is not integer"); return 0; } - - private: - ErrorHandler& handler_; }; -template