diff --git a/.travis.yml b/.travis.yml index ed51852..a8aa4d5 100644 --- a/.travis.yml +++ b/.travis.yml @@ -16,7 +16,7 @@ matrix: # - env: RAKAU_BUILD="osx_debug" SPLIT_TEST_NUM="1" TEST_NSPLIT="2" # os: osx # osx_image: xcode10.1 - - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="0" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="0" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -25,7 +25,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="1" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="1" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -34,7 +34,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="2" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="2" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -43,7 +43,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="0" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="3" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -52,7 +52,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="1" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug" SPLIT_TEST_NUM="4" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -61,7 +61,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="2" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="0" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -70,7 +70,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="0" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="1" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -79,7 +79,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="1" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="2" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -88,7 +88,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="2" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="3" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -97,7 +97,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="0" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_nosimd" SPLIT_TEST_NUM="4" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -106,7 +106,7 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="1" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="0" TEST_NSPLIT="5" compiler: gcc os: linux addons: @@ -115,7 +115,79 @@ matrix: - ubuntu-toolchain-r-test packages: - g++-7 - - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="2" TEST_NSPLIT="3" + - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="1" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="2" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="3" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native" SPLIT_TEST_NUM="4" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="0" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="1" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="2" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="3" TEST_NSPLIT="5" + compiler: gcc + os: linux + addons: + apt: + sources: + - ubuntu-toolchain-r-test + packages: + - g++-7 + - env: RAKAU_BUILD="gcc7_debug_native_norsqrt" SPLIT_TEST_NUM="4" TEST_NSPLIT="5" compiler: gcc os: linux addons: diff --git a/CMakeLists.txt b/CMakeLists.txt index b30274a..bc641f9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,19 +1,21 @@ cmake_minimum_required(VERSION 3.3.0) -project(rakau VERSION 0.1 LANGUAGES CXX C) - -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/yacma") - -message(STATUS "System name: ${CMAKE_SYSTEM_NAME}") -message(STATUS "rakau version: ${rakau_VERSION}") - # Set default build type to "Release". +# NOTE: this should be done before the project command since the latter can set +# CMAKE_BUILD_TYPE itself (it does so for nmake). if(NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) endif() +project(rakau VERSION 0.1 LANGUAGES CXX C) + +list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" "${CMAKE_CURRENT_SOURCE_DIR}/cmake/yacma") + +message(STATUS "System name: ${CMAKE_SYSTEM_NAME}") +message(STATUS "rakau version: ${rakau_VERSION}") + # The build options. option(RAKAU_BUILD_TESTS "Build unit tests." OFF) option(RAKAU_BUILD_BENCHMARKS "Build benchmarks." OFF) @@ -104,6 +106,28 @@ find_package(TBB REQUIRED) # Threading setup. include(RakauFindThreads) +# NOTE: on Unix systems, the correct library installation path +# could be something other than just "lib", such as "lib64", +# "lib32", etc., depending on platform/configuration. Apparently, +# CMake provides this information via the GNUInstallDirs module. +# Let's enable this for now on all Unixes except OSX. +# NOTE: potentially, this could be applicable to Cygwin as well. +# +# https://cmake.org/cmake/help/v3.15/module/GNUInstallDirs.html +# https://cmake.org/pipermail/cmake/2013-July/055375.html +if(UNIX AND NOT APPLE) + include(GNUInstallDirs) + set(_RAKAU_INSTALL_LIBDIR_DEFAULT "${CMAKE_INSTALL_LIBDIR}") +else() + set(_RAKAU_INSTALL_LIBDIR_DEFAULT "lib") +endif() +if(NOT RAKAU_INSTALL_LIBDIR) + set(RAKAU_INSTALL_LIBDIR "${_RAKAU_INSTALL_LIBDIR_DEFAULT}" CACHE STRING + "Library installation directory." FORCE) +endif() +mark_as_advanced(RAKAU_INSTALL_LIBDIR) +message(STATUS "Library installation directory: ${RAKAU_INSTALL_LIBDIR}") + # Initial setup of the rakau library. if(RAKAU_WITH_ROCM) add_library(rakau SHARED "${CMAKE_CURRENT_SOURCE_DIR}/src/rakau_rocm.cpp") @@ -185,20 +209,20 @@ configure_file("${CMAKE_CURRENT_SOURCE_DIR}/config.hpp.in" "${CMAKE_CURRENT_BINA set(_RAKAU_CONFIG_OPTIONAL_DEPS) configure_file("${CMAKE_CURRENT_SOURCE_DIR}/rakau-config.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/rakau-config.cmake" @ONLY) if(RAKAU_WITH_ROCM OR RAKAU_WITH_CUDA) - install(TARGETS rakau EXPORT rakau_export LIBRARY DESTINATION "lib") + install(TARGETS rakau EXPORT rakau_export LIBRARY DESTINATION "${RAKAU_INSTALL_LIBDIR}") else() install(TARGETS rakau EXPORT rakau_export) endif() -install(FILES "${CMAKE_CURRENT_BINARY_DIR}/rakau-config.cmake" DESTINATION "lib/cmake/rakau") -install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/RakauFindBoost.cmake" DESTINATION "lib/cmake/rakau") -install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindTBB.cmake" DESTINATION "lib/cmake/rakau") -install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/RakauFindThreads.cmake" DESTINATION "lib/cmake/rakau") -install(EXPORT rakau_export NAMESPACE rakau:: DESTINATION lib/cmake/rakau) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/rakau-config.cmake" DESTINATION "${RAKAU_INSTALL_LIBDIR}/cmake/rakau") +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/RakauFindBoost.cmake" DESTINATION "${RAKAU_INSTALL_LIBDIR}/cmake/rakau") +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/FindTBB.cmake" DESTINATION "${RAKAU_INSTALL_LIBDIR}/cmake/rakau") +install(FILES "${CMAKE_CURRENT_SOURCE_DIR}/cmake/RakauFindThreads.cmake" DESTINATION "${RAKAU_INSTALL_LIBDIR}/cmake/rakau") +install(EXPORT rakau_export NAMESPACE rakau:: DESTINATION ${RAKAU_INSTALL_LIBDIR}/cmake/rakau) # Take care of versioning. include(CMakePackageConfigHelpers) write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/rakau-config-version.cmake" VERSION ${rakau_VERSION} COMPATIBILITY SameMajorVersion) -install(FILES "${CMAKE_CURRENT_BINARY_DIR}/rakau-config-version.cmake" DESTINATION "lib/cmake/rakau") +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/rakau-config-version.cmake" DESTINATION "${RAKAU_INSTALL_LIBDIR}/cmake/rakau") unset(_RAKAU_CONFIG_OPTIONAL_DEPS) # Installation of the header files. diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index ea67ee0..e3820ac 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -12,6 +12,7 @@ endfunction() ADD_RAKAU_BENCHMARK(benchmark_acc) ADD_RAKAU_BENCHMARK(benchmark_acc_pot) +ADD_RAKAU_BENCHMARK(benchmark_coll) ADD_RAKAU_BENCHMARK(benchmark_pot) ADD_RAKAU_BENCHMARK(benchmark_move) ADD_RAKAU_BENCHMARK(benchmark_leapfrog) diff --git a/benchmark/benchmark_coll.cpp b/benchmark/benchmark_coll.cpp new file mode 100644 index 0000000..8e000f5 --- /dev/null +++ b/benchmark/benchmark_coll.cpp @@ -0,0 +1,71 @@ +// Copyright 2018 Francesco Biscani (bluescarni@gmail.com) +// +// This file is part of the rakau library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "common.hpp" + +using namespace rakau; +using namespace rakau_benchmark; + +int main(int argc, char **argv) +{ + std::cout.precision(20); + + const auto popts = parse_coll_benchmark_options(argc, argv); + + std::optional t_init; + if (std::get<2>(popts)) { + t_init.emplace(std::get<2>(popts)); + } + + auto runner = [&popts](auto x) { + using fp_type = decltype(x); + + const auto [nparts, max_leaf_n, _1, bsize, a, parinit, _2, ordered, psize] = popts; + + auto parts = get_plummer_sphere(nparts, static_cast(a), static_cast(bsize), parinit); + const std::vector aabb_sizes(nparts, psize); + + octree t{kwargs::x_coords = parts.data() + nparts, + kwargs::y_coords = parts.data() + 2 * nparts, + kwargs::z_coords = parts.data() + 3 * nparts, + kwargs::masses = parts.data(), + kwargs::nparts = nparts, + kwargs::max_leaf_n = max_leaf_n}; + + std::cout << t << '\n'; + + decltype(t.compute_cgraph_o(aabb_sizes.data())) cgraph; + if (ordered) { + cgraph = t.compute_cgraph_o(aabb_sizes.data()); + } else { + cgraph = t.compute_cgraph_u(aabb_sizes.data()); + } + + const auto acc = std::accumulate(cgraph.begin(), cgraph.end(), 0ull, + [](auto cur, const auto &c) { return cur + c.size(); }); + + std::cout << "Total number of collisions detected: " << acc / 2u << '\n'; + }; + + if (std::get<6>(popts) == "float") { + runner(0.f); + } else { + runner(0.); + } +} diff --git a/benchmark/common.hpp b/benchmark/common.hpp index 688191c..71cc824 100644 --- a/benchmark/common.hpp +++ b/benchmark/common.hpp @@ -228,6 +228,61 @@ inline auto parse_accpot_benchmark_options(int argc, char **argv) ordered}; } +inline auto parse_coll_benchmark_options(int argc, char **argv) +{ + namespace po = boost::program_options; + + unsigned long nparts; + unsigned max_leaf_n, nthreads; + double bsize, a, psize; + bool parinit = false; + std::string fp_type; + bool ordered = false; + + po::options_description desc("Allowed options"); + desc.add_options()("help", "produce help message")( + "nparts", po::value(&nparts)->default_value(1'000'000ul), "number of particles")( + "max_leaf_n", po::value(&max_leaf_n)->default_value(rakau::default_max_leaf_n), + "max number of particles in a leaf node")("a", po::value(&a)->default_value(1.), "Plummer core radius")( + "bsize", po::value(&bsize)->default_value(0.), + "size of the domain (if 0, it is automatically deduced)")( + "psize", po::value(&psize)->default_value(1E-3), + "particle size")("nthreads", po::value(&nthreads)->default_value(0u), + "number of threads to use (0 for auto-detection)")( + "parinit", "parallel nondeterministic initialisation of the particle distribution")( + "fp_type", po::value(&fp_type)->default_value("float"), + "floating-point type to use in the computations")( + "ordered", "compute the collision graph using the original particle order"); + + po::variables_map vm; + po::store(po::parse_command_line(argc, argv, desc), vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << desc << "\n"; + std::exit(0); + } + + if (nparts == 0u) { + throw std::invalid_argument("The number of particles cannot be zero"); + } + + if (vm.count("parinit")) { + parinit = true; + } + + if (fp_type != "float" && fp_type != "double") { + throw std::invalid_argument("Only the 'float' and 'double' floating-point types are supported, but the type '" + + fp_type + "' was specified instead"); + } + + if (vm.count("ordered")) { + ordered = true; + } + + return std::tuple{nparts, max_leaf_n, nthreads, bsize, a, parinit, std::move(fp_type), ordered, psize}; +} + } // namespace rakau_benchmark #endif diff --git a/cmake/FindTBB.cmake b/cmake/FindTBB.cmake index ca28351..b1a3d88 100644 --- a/cmake/FindTBB.cmake +++ b/cmake/FindTBB.cmake @@ -1,546 +1,421 @@ -#.rst: -# FindTBB -# ------- +# - Find ThreadingBuildingBlocks include dirs and libraries +# Use this module by invoking find_package with the form: +# find_package(TBB +# [REQUIRED] # Fail with error if TBB is not found +# ) # +# Once done, this will define +# +# TBB_FOUND - system has TBB +# TBB_INCLUDE_DIRS - the TBB include directories +# TBB_LIBRARIES - TBB libraries to be lined, doesn't include malloc or +# malloc proxy +# TBB::tbb - imported target for the TBB library +# +# TBB_VERSION_MAJOR - Major Product Version Number +# TBB_VERSION_MINOR - Minor Product Version Number +# TBB_INTERFACE_VERSION - Engineering Focused Version Number +# TBB_COMPATIBLE_INTERFACE_VERSION - The oldest major interface version +# still supported. This uses the engineering +# focused interface version numbers. +# +# TBB_MALLOC_FOUND - system has TBB malloc library +# TBB_MALLOC_INCLUDE_DIRS - the TBB malloc include directories +# TBB_MALLOC_LIBRARIES - The TBB malloc libraries to be lined +# TBB::malloc - imported target for the TBB malloc library +# +# TBB_MALLOC_PROXY_FOUND - system has TBB malloc proxy library +# TBB_MALLOC_PROXY_INCLUDE_DIRS = the TBB malloc proxy include directories +# TBB_MALLOC_PROXY_LIBRARIES - The TBB malloc proxy libraries to be lined +# TBB::malloc_proxy - imported target for the TBB malloc proxy library # -# Find Intel's Threading Building Blocks (TBB) include path and libraries. # # This module reads hints about search locations from variables: -# -# :: -# -# TBB_ROOT - Root directory of pre-built TBB package. -# Can be an environment variable instead. It is -# derived from the found TBB_INCLUDE_DIR if unset. -# TBB_ARCH_PLATFORM - Environment variable which can be used to specify -# architecture and platform specific library path -# suffix (excluding "/lib/" suffix or prefix). -# For MSVC, the appropriate link library path of the -# official pre-built download package from the TBB -# web site is chosen by this module. The path suffix -# derived from this variable takes precedence. -# -# This module considers the following CMake variables set by find_package: -# -# :: -# -# TBB_FIND_COMPONENTS - Case-insensitive names of requested libraries: -# tbb, [tbb]malloc, [tbb]malloc_proxy -# TBB_FIND_REQUIRED_ - Whether TBB library component is required. -# TBB is considered to be not found when at least -# one required library or its include path is missing. -# When no TBB_FIND_COMPONENTS are specified, only the -# threading library "tbb" is required. -# TBB_FIND_REQUIRED - Raise FATAL_ERROR when required components not found. -# TBB_FIND_QUIETLY - Suppress all other (status) messages. -# -# The TBB_DEBUG variable can be set to TRUE before find_package(TBB) to -# enable verbose output which helps to debug the processing of this module: -# -# :: -# -# set(TBB_DEBUG TRUE) -# find_package(TBB) -# -# This module defines the following variables: -# -# :: -# -# TBB_FOUND - Whether TBB libraries were found. -# TBB_INCLUDE_DIR - TBB library include path where tbb/tbb_stddef.h is located. -# Used as HINTS for find_path of TBB__INCLUDE_DIR. -# TBB_INCLUDE_DIRS - Include paths of found TBB libraries. -# TBB_LIBRARIES - File paths of found TBB libraries. -# TBB_VERSION - Version for use in VERSION_LESS et al. comparisons. -# TBB_VERSION_MAJOR - Major library version number. -# TBB_VERSION_MINOR - Minor library version number. -# TBB_VERSION_STRING - Version string for output messages. -# TBB_INTERFACE_VERSION - API version number. -# TBB_COMPATIBLE_INTERFACE_VERSION - The oldest major version still supported. -# -# Additionally, for each requested component, this module defines the following variables: -# -# :: -# -# TBB_TBB_FOUND - Whether TBB threading library was found. -# TBB_TBB_INCLUDE_DIR - Include path of TBB threading library. -# TBB_TBB_INCLUDE_DIRS - Include paths for use of TBB library. -# TBB_TBB_LIBRARIES - TBB threading library and transitive link dependencies. -# TBB_TBB_LIBRARY_RELEASE - File path of optimized TBB link library. -# TBB_TBB_LIBRARY_DEBUG - File path of TBB link library with debug symbols. -# TBB_TBB_LIBRARY - File paths of both "optimized" and "debug" TBB threading link libraries. -# When only one of these is found, this variable is set to either -# TBB_TBB_LIBRARY_RELEASE or TBB_TBB_LIBRARY_DEBUG. -# -# TBB_MALLOC_FOUND - Whether TBB malloc library was found. -# TBB_MALLOC_INCLUDE_DIR - Include path of TBB malloc library. -# TBB_MALLOC_INCLUDE_DIRS - Include paths for use of TBB malloc library. -# TBB_MALLOC_LIBRARIES - TBB malloc library and transitive link dependencies. -# TBB_MALLOC_LIBRARY_RELEASE - File path of optimized TBB malloc link library. -# TBB_MALLOC_LIBRARY_DEBUG - File path of TBB malloc link library with debug symbols. -# TBB_MALLOC_LIBRARY - File paths of both "optimized" and "debug" TBB malloc link libraries. -# When only one of these is found, this variable is set to either -# TBB_MALLOC_LIBRARY_RELEASE or TBB_MALLOC_LIBRARY_DEBUG. -# -# TBB_MALLOC_PROXY_FOUND - Whether TBB malloc proxy library was found. -# TBB_MALLOC_PROXY_INCLUDE_DIR - Include path of TBB malloc proxy library. -# TBB_MALLOC_PROXY_INCLUDE_DIRS - Include paths for use of TBB malloc proxy library. -# TBB_MALLOC_PROXY_LIBRARIES - TBB malloc proxy library and transitive link dependencies. -# TBB_MALLOC_PROXY_LIBRARY_RELEASE - File path of optimized TBB malloc proxy link library. -# TBB_MALLOC_PROXY_LIBRARY_DEBUG - File path of TBB malloc proxy link library with debug symbols. -# TBB_MALLOC_PROXY_LIBRARY - File paths of both "optimized" and "debug" TBB malloc proxy link libraries. -# When only one of these is found, this variable is set to either -# TBB_MALLOC_PROXY_LIBRARY_RELEASE or TBB_MALLOC_PROXY_LIBRARY_DEBUG. -# -# of these, the following variables are added as advanced cache entries: -# -# :: -# -# TBB_INCLUDE_DIR -# TBB__INCLUDE_DIR -# TBB__LIBRARY_RELEASE -# TBB__LIBRARY_DEBUG -# -# This module further defines the following import targets with the IMPORTED and INTERFACE -# properties set appropriately such that only a target_link_libraries command is required -# to declare the dependency of another target on the respective TBB library component. -# The use of these import targets instead of above defined variables is recommended. -# -# :: -# -# TBB::tbb - TBB threading library. -# TBB::malloc - TBB malloc library. -# TBB::malloc_proxy - TBB malloc proxy library. -# -# Example usage: -# -# :: -# -# find_package(TBB REQUIRED COMPONENTS tbb OPTIONAL_COMPONENTS malloc) -# -# add_executable(foo foo.cc) -# target_link_libraries(foo TBB::tbb) -# if (TARGET TBB::malloc) -# # or if (TBB_MALLOC_FOUND) -# target_link_libraries(foo TBB::malloc) -# endif () -# -# This module was written by Andreas Schuh for CMake BASIS with inspiration -# from the FindTBB module which was originally part of the Object-oriented -# Graphics Rendering Engine (OGRE) project with modifications by Robert Maynard. - -# Copyright (c) 2011-2012 University of Pennsylvania -# Copyright (c) 2013-2014 Carnegie Mellon University -# Copyright (c) 2013-2016 Andreas Schuh -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# 1. Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# 2. Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR -# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -if (NOT TBB_FIND_QUIETLY) - set(_TBB_FIND_STATUS "Looking for TBB") - if (TBB_FIND_COMPONENTS) - set(_TBB_FIND_STATUS "${_TBB_FIND_STATUS} [${TBB_FIND_COMPONENTS}]") +# ENV TBB_ARCH_PLATFORM - for eg. set it to "mic" for Xeon Phi builds +# ENV TBB_ROOT or just TBB_ROOT - root directory of tbb installation +# ENV TBB_BUILD_PREFIX - specifies the build prefix for user built tbb +# libraries. Should be specified with ENV TBB_ROOT +# and optionally... +# ENV TBB_BUILD_DIR - if build directory is different than ${TBB_ROOT}/build +# +# +# Modified by Robert Maynard from the original OGRE source +# +#------------------------------------------------------------------- +# This file is part of the CMake build system for OGRE +# (Object-oriented Graphics Rendering Engine) +# For the latest info, see http://www.ogre3d.org/ +# +# The contents of this file are placed in the public domain. Feel +# free to make use of it in any way you like. +#------------------------------------------------------------------- +# +#============================================================================= +# Copyright 2010-2012 Kitware, Inc. +# Copyright 2012 Rolf Eike Beer +# +# Distributed under the OSI-approved BSD License (the "License"); +# see accompanying file Copyright.txt for details. +# +# This software is distributed WITHOUT ANY WARRANTY; without even the +# implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. +# See the License for more information. +#============================================================================= +# (To distribute this file outside of CMake, substitute the full +# License text for the above reference.) + + +#============================================================================= +# FindTBB helper functions and macros +# + +#==================================================== +# Fix the library path in case it is a linker script +#==================================================== +function(tbb_extract_real_library library real_library) + if(NOT UNIX OR NOT EXISTS ${library}) + set(${real_library} "${library}" PARENT_SCOPE) + return() + endif() + + #Read in the first 4 bytes and see if they are the ELF magic number + set(_elf_magic "7f454c46") + file(READ ${library} _hex_data OFFSET 0 LIMIT 4 HEX) + if(_hex_data STREQUAL _elf_magic) + #we have opened a elf binary so this is what + #we should link to + set(${real_library} "${library}" PARENT_SCOPE) + return() + endif() + + file(READ ${library} _data OFFSET 0 LIMIT 1024) + if("${_data}" MATCHES "INPUT \\(([^(]+)\\)") + #extract out the .so name from REGEX MATCH command + set(_proper_so_name "${CMAKE_MATCH_1}") + + #construct path to the real .so which is presumed to be in the same directory + #as the input file + get_filename_component(_so_dir "${library}" DIRECTORY) + set(${real_library} "${_so_dir}/${_proper_so_name}" PARENT_SCOPE) + else() + #unable to determine what this library is so just hope everything works + #and pass it unmodified. + set(${real_library} "${library}" PARENT_SCOPE) + endif() +endfunction() + +#=============================================== +# Do the final processing for the package find. +#=============================================== +macro(findpkg_finish PREFIX TARGET_NAME) + # skip if already processed during this run + if (NOT ${PREFIX}_FOUND) + if (${PREFIX}_INCLUDE_DIR AND ${PREFIX}_LIBRARY) + set(${PREFIX}_FOUND TRUE) + set (${PREFIX}_INCLUDE_DIRS ${${PREFIX}_INCLUDE_DIR}) + set (${PREFIX}_LIBRARIES ${${PREFIX}_LIBRARY}) + else () + if (${PREFIX}_FIND_REQUIRED AND NOT ${PREFIX}_FIND_QUIETLY) + message(FATAL_ERROR "Required library ${PREFIX} not found.") + endif () + endif () + + if (NOT TARGET "TBB::${TARGET_NAME}") + if (${PREFIX}_LIBRARY_RELEASE) + tbb_extract_real_library(${${PREFIX}_LIBRARY_RELEASE} real_release) + endif () + if (${PREFIX}_LIBRARY_DEBUG) + tbb_extract_real_library(${${PREFIX}_LIBRARY_DEBUG} real_debug) + endif () + add_library(TBB::${TARGET_NAME} UNKNOWN IMPORTED) + set_target_properties(TBB::${TARGET_NAME} PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${${PREFIX}_INCLUDE_DIR}") + if (${PREFIX}_LIBRARY_DEBUG AND ${PREFIX}_LIBRARY_RELEASE) + set_target_properties(TBB::${TARGET_NAME} PROPERTIES + IMPORTED_LOCATION "${real_release}" + IMPORTED_LOCATION_DEBUG "${real_debug}" + IMPORTED_LOCATION_RELEASE "${real_release}") + elseif (${PREFIX}_LIBRARY_RELEASE) + set_target_properties(TBB::${TARGET_NAME} PROPERTIES + IMPORTED_LOCATION "${real_release}") + elseif (${PREFIX}_LIBRARY_DEBUG) + set_target_properties(TBB::${TARGET_NAME} PROPERTIES + IMPORTED_LOCATION "${real_debug}") + endif () + endif () + + #mark the following variables as internal variables + mark_as_advanced(${PREFIX}_INCLUDE_DIR + ${PREFIX}_LIBRARY + ${PREFIX}_LIBRARY_DEBUG + ${PREFIX}_LIBRARY_RELEASE) endif () - if (NOT TBB_FIND_REQUIRED) - set(_TBB_FIND_STATUS "${_TBB_FIND_STATUS} (optional)") +endmacro() + +#=============================================== +# Generate debug names from given release names +#=============================================== +macro(get_debug_names PREFIX) + foreach(i ${${PREFIX}}) + set(${PREFIX}_DEBUG ${${PREFIX}_DEBUG} ${i}d ${i}D ${i}_d ${i}_D ${i}_debug ${i}) + endforeach() +endmacro() + +#=============================================== +# See if we have env vars to help us find tbb +#=============================================== +macro(getenv_path VAR) + set(ENV_${VAR} $ENV{${VAR}}) + # replace won't work if var is blank + if (ENV_${VAR}) + string( REGEX REPLACE "\\\\" "/" ENV_${VAR} ${ENV_${VAR}} ) + endif () +endmacro() + +#=============================================== +# Couple a set of release AND debug libraries +#=============================================== +macro(make_library_set PREFIX) + if (${PREFIX}_RELEASE AND ${PREFIX}_DEBUG) + set(${PREFIX} optimized ${${PREFIX}_RELEASE} debug ${${PREFIX}_DEBUG}) + elseif (${PREFIX}_RELEASE) + set(${PREFIX} ${${PREFIX}_RELEASE}) + elseif (${PREFIX}_DEBUG) + set(${PREFIX} ${${PREFIX}_DEBUG}) endif () - message(STATUS "${_TBB_FIND_STATUS}...") -endif () +endmacro() -# ------------------------------------------------------------------------------ -# Default required/optional components -if (NOT TBB_FIND_COMPONENTS) - set(TBB_FIND_COMPONENTS tbb malloc malloc_proxy) - set(TBB_FIND_REQUIRED_tbb TRUE) - set(TBB_FIND_REQUIRED_malloc FALSE) - set(TBB_FIND_REQUIRED_malloc_proxy FALSE) -endif () -# ------------------------------------------------------------------------------ -# Normalize component names -set(_TBB_FIND_COMPONENTS) -foreach (__TBB_COMPONENT IN LISTS TBB_FIND_COMPONENTS) - string(TOUPPER "${__TBB_COMPONENT}" _TBB_COMPONENT) - string(REGEX REPLACE "^TBB_?([A-Z_]+)$" "\\1" _TBB_COMPONENT "${_TBB_COMPONENT}") - if (_TBB_COMPONENT MATCHES "^(TBB|MALLOC|MALLOC_PROXY)$") - set(_TBB_${_TBB_COMPONENT}_NAME ${__TBB_COMPONENT}) - list(APPEND _TBB_FIND_COMPONENTS ${_TBB_COMPONENT}) - if (TBB_FIND_REQUIRED_${__TBB_COMPONENT}) - set(_TBB_FIND_REQUIRED_${_TBB_COMPONENT} TRUE) - else () - set(_TBB_FIND_REQUIRED_${_TBB_COMPONENT} FALSE) - endif () - else () - message(FATAL_ERROR "Unknown TBB library component: ${__TBB_COMPONENT}\n" - "Valid component names are: tbb, [tbb]malloc, [tbb]malloc_proxy") +#============================================================================= +# Now to actually find TBB +# + +# Get path, convert backslashes as ${ENV_${var}} +getenv_path(TBB_ROOT) + +# initialize search paths +set(TBB_PREFIX_PATH ${TBB_ROOT} ${ENV_TBB_ROOT}) +set(TBB_INC_SEARCH_PATH "") +set(TBB_LIB_SEARCH_PATH "") + + +# If user built from sources +set(TBB_BUILD_PREFIX $ENV{TBB_BUILD_PREFIX}) +if (TBB_BUILD_PREFIX AND ENV_TBB_ROOT) + getenv_path(TBB_BUILD_DIR) + if (NOT ENV_TBB_BUILD_DIR) + set(ENV_TBB_BUILD_DIR ${ENV_TBB_ROOT}/build) endif () -endforeach () -unset(__TBB_COMPONENT) -if (TBB_DEBUG) - message("** FindTBB: Components = [${_TBB_FIND_COMPONENTS}]") + # include directory under ${ENV_TBB_ROOT}/include + list(APPEND TBB_LIB_SEARCH_PATH + ${ENV_TBB_BUILD_DIR}/${TBB_BUILD_PREFIX}_release + ${ENV_TBB_BUILD_DIR}/${TBB_BUILD_PREFIX}_debug) endif () -# ------------------------------------------------------------------------------ -# Names of headers and libraries for each component -set(_TBB_TBB_LIB_NAMES_RELEASE tbb) -set(_TBB_TBB_LIB_NAMES_DEBUG tbb_debug) -set(_TBB_TBB_INC_NAMES tbb/tbb.h) - -set(_TBB_MALLOC_LIB_NAMES_RELEASE tbbmalloc) -set(_TBB_MALLOC_LIB_NAMES_DEBUG tbbmalloc_debug) -set(_TBB_MALLOC_INC_NAMES tbb/tbb.h) - -set(_TBB_MALLOC_PROXY_LIB_NAMES_RELEASE tbbmalloc_proxy) -set(_TBB_MALLOC_PROXY_LIB_NAMES_DEBUG tbbmalloc_proxy_debug) -set(_TBB_MALLOC_PROXY_INC_NAMES tbb/tbbmalloc_proxy.h) - -# ------------------------------------------------------------------------------ -# Transitive link dependencies -set(_TBB_TBB_LIB_LINK_DEPENDS) -set(_TBB_MALLOC_LIB_LINK_DEPENDS) -set(_TBB_MALLOC_PROXY_LIB_LINK_DEPENDS) - -if (UNIX AND NOT APPLE) - # On Linux, the TBB threading library requires librt.so - list(APPEND _TBB_TBB_LIB_LINK_DEPENDS rt) -endif () -# ------------------------------------------------------------------------------ -# Construct a set of search paths -set(_TBB_ARCH_PLATFORM $ENV{TBB_ARCH_PLATFORM}) +# For Windows, let's assume that the user might be using the precompiled +# TBB packages from the main website. These use a rather awkward directory +# structure (at least for automatically finding the right files) depending +# on platform and compiler, but we'll do our best to accommodate it. +# Not adding the same effort for the precompiled linux builds, though. Those +# have different versions for CC compiler versions and linux kernels which +# will never adequately match the user's setup, so there is no feasible way +# to detect the "best" version to use. The user will have to manually +# select the right files. (Chances are the distributions are shipping their +# custom version of tbb, anyway, so the problem is probably nonexistent.) +if (WIN32 AND MSVC) + set(COMPILER_PREFIX "vc7.1") + if (MSVC_VERSION EQUAL 1400) + set(COMPILER_PREFIX "vc8") + elseif(MSVC_VERSION EQUAL 1500) + set(COMPILER_PREFIX "vc9") + elseif(MSVC_VERSION EQUAL 1600) + set(COMPILER_PREFIX "vc10") + elseif(MSVC_VERSION EQUAL 1700) + set(COMPILER_PREFIX "vc11") + elseif(MSVC_VERSION EQUAL 1800) + set(COMPILER_PREFIX "vc12") + elseif(MSVC_VERSION EQUAL 1900) + set(COMPILER_PREFIX "vc14") + endif () -if (NOT TBB_ROOT) - file(TO_CMAKE_PATH "$ENV{TBB_ROOT}" TBB_ROOT) + # for each prefix path, add ia32/64\${COMPILER_PREFIX}\lib to the lib search path + foreach (dir IN LISTS TBB_PREFIX_PATH) + if (CMAKE_CL_64) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia64/${COMPILER_PREFIX}/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia64/${COMPILER_PREFIX}) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/${COMPILER_PREFIX}/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64/${COMPILER_PREFIX}) + else () + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/${COMPILER_PREFIX}/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${COMPILER_PREFIX}) + endif () + endforeach () endif () -set(_TBB_INC_PATH_SUFFIXES include) +# For OS X binary distribution, choose libc++ based libraries for Mavericks (10.9) +# and above and AppleClang +if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" AND + NOT CMAKE_SYSTEM_VERSION VERSION_LESS 13.0) + set (USE_LIBCXX OFF) + cmake_policy(GET CMP0025 POLICY_VAR) -set(_TBB_LIB_PATH_SUFFIXES) -if (_TBB_ARCH_PLATFORM) - list(APPEND _TBB_LIB_PATH_SUFFIXES lib/${_TBB_ARCH_PLATFORM}) - list(APPEND _TBB_LIB_PATH_SUFFIXES ${_TBB_ARCH_PLATFORM}/lib) -endif () -list(APPEND _TBB_LIB_PATH_SUFFIXES lib) - -if (WIN32 AND MSVC AND CMAKE_GENERATOR MATCHES "Visual Studio ([0-9]+)") - set(_TBB_MSVS_VERSION ${CMAKE_MATCH_1}) - if (CMAKE_CL_64) - list(APPEND _TBB_LIB_PATH_SUFFIXES lib/intel64/vc${_TBB_MSVS_VERSION}) - list(APPEND _TBB_LIB_PATH_SUFFIXES intel64/vc${_TBB_MSVS_VERSION}/lib) - list(APPEND _TBB_LIB_PATH_SUFFIXES lib/ia64/vc${_TBB_MSVS_VERSION}) - list(APPEND _TBB_LIB_PATH_SUFFIXES ia64/vc${_TBB_MSVS_VERSION}/lib) + if (POLICY_VAR STREQUAL "NEW") + if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang") + set (USE_LIBCXX ON) + endif () else () - list(APPEND _TBB_LIB_PATH_SUFFIXES lib/ia32/vc${_TBB_MSVS_VERSION}) - list(APPEND _TBB_LIB_PATH_SUFFIXES ia32/vc${_TBB_MSVS_VERSION}/lib) + if (CMAKE_CXX_COMPILER_ID STREQUAL "Clang") + set (USE_LIBCXX ON) + endif () endif () - unset(_TBB_MSVS_VERSION) -endif () -if (TBB_DEBUG) - message("** FindTBB: Initial search paths:") - message("** FindTBB: - Root directory hints = [${TBB_ROOT}]") - message("** FindTBB: - Include path suffixes = [${_TBB_INC_PATH_SUFFIXES}]") - message("** FindTBB: - Library path suffixes = [${_TBB_LIB_PATH_SUFFIXES}]") -endif () - -# ------------------------------------------------------------------------------ -# Find common include directory -# -# Looking for tbb/tbb_stddef.h because we use this path later to read this file -# in order to extract the version information. The tbb.h header should be in the -# same directory and is searched for separately as part of the "tbb" and "malloc" -# component search. The TBB_INCLUDE_DIR is then used as HINTS. -find_path(TBB_INCLUDE_DIR - NAMES tbb/tbb_stddef.h - HINTS ${TBB_ROOT} - PATH_SUFFIXES ${_TBB_INC_PATH_SUFFIXES} -) - -mark_as_advanced(TBB_INCLUDE_DIR) - -# ------------------------------------------------------------------------------ -# Derive TBB_ROOT from TBB_INCLUDE_DIR if unset -if (TBB_INCLUDE_DIR AND NOT TBB_ROOT) - if (_TBB_INC_PATH_SUFFIXES MATCHES "[^/;]/[^/;]") - string(LENGTH "${TBB_INCLUDE_DIR}" _TBB_INCLUDE_DIR_LENGTH) - foreach (_TBB_INC_PATH_SUFFIX IN LISTS _TBB_INC_PATH_SUFFIXES) - string(LENGTH "${_TBB_INC_PATH_SUFFIX}" _TBB_INC_PATH_SUFFIX_LENGTH) - if (_TBB_INC_PATH_SUFFIX_LENGTH GREATER 0) - math(EXPR _TBB_SUBSTRING_START "${_TBB_INCLUDE_DIR_LENGTH} - ${_TBB_INC_PATH_SUFFIX_LENGTH}") - string(SUBSTRING "${TBB_INCLUDE_DIR}" _TBB_SUBSTRING_START -1 _TBB_SUBSTRING) - if (_TBB_SUBSTRING STREQUAL _TBB_INC_PATH_SUFFIX) - if (_TBB_SUBSTRING_START GREATER 0) - string(SUBSTRING "${TBB_INCLUDE_DIR}" 0 _TBB_SUBSTRING_START TBB_ROOT) - string(REGEX REPLACE "/+$" "" TBB_ROOT "${TBB_ROOT}") - else () - set(TBB_ROOT "/") - endif () - break() - endif () - endif () + if (USE_LIBCXX) + foreach (dir IN LISTS TBB_PREFIX_PATH) + list (APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/libc++ ${dir}/libc++/lib) endforeach () - unset(_TBB_SUBSTRING) - unset(_TBB_SUBSTRING_START) - unset(_TBB_INCLUDE_DIR_LENGTH) - unset(_TBB_INC_PATH_SUFFIX_LENGTH) - else () - get_filename_component(TBB_ROOT "${TBB_INCLUDE_DIR}" DIRECTORY) endif () endif () -if (TBB_DEBUG) - message("** FindTBB: After initial search of TBB include path") - message("** FindTBB: - TBB_INCLUDE_DIR = ${TBB_INCLUDE_DIR}") - message("** FindTBB: - TBB_ROOT = [${TBB_ROOT}]") +# check compiler ABI +if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU") + set(COMPILER_PREFIX) + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.7) + list(APPEND COMPILER_PREFIX "gcc4.7") + endif() + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 4.4) + list(APPEND COMPILER_PREFIX "gcc4.4") + endif() + list(APPEND COMPILER_PREFIX "gcc4.1") +elseif(CMAKE_CXX_COMPILER_ID MATCHES "Clang") + set(COMPILER_PREFIX) + if (NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 3.6) + list(APPEND COMPILER_PREFIX "gcc4.7") + endif() + list(APPEND COMPILER_PREFIX "gcc4.4") +else() # Assume compatibility with 4.4 for other compilers + list(APPEND COMPILER_PREFIX "gcc4.4") endif () -# ------------------------------------------------------------------------------ -# Find library components -set(TBB_INCLUDE_DIRS) -set(TBB_LIBRARIES) +# if platform architecture is explicitly specified +set(TBB_ARCH_PLATFORM $ENV{TBB_ARCH_PLATFORM}) +if (TBB_ARCH_PLATFORM) + foreach (dir IN LISTS TBB_PREFIX_PATH) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/${TBB_ARCH_PLATFORM}/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/${TBB_ARCH_PLATFORM}) + endforeach () +endif () -foreach (_TBB_COMPONENT IN LISTS _TBB_FIND_COMPONENTS) - if (TBB_DEBUG) - message("** FindTBB: Looking for component ${_TBB_COMPONENT}...") - endif () +foreach (dir IN LISTS TBB_PREFIX_PATH) + foreach (prefix IN LISTS COMPILER_PREFIX) + if (CMAKE_SIZEOF_VOID_P EQUAL 8) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/intel64/${prefix}) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/intel64/${prefix}/lib) + else () + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib/ia32/${prefix}) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/lib) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/ia32/${prefix}/lib) + endif () + endforeach() +endforeach () - # Find include path and library files of this component - find_path(TBB_${_TBB_COMPONENT}_INCLUDE_DIR - NAMES ${_TBB_${_TBB_COMPONENT}_INC_NAMES} - HINTS ${TBB_INCLUDE_DIR} ${TBB_ROOT} - PATH_SUFFIXES ${_TBB_INC_PATH_SUFFIXES} - ) - - find_library(TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE - NAMES ${_TBB_${_TBB_COMPONENT}_LIB_NAMES_RELEASE} - HINTS ${TBB_ROOT} - PATH_SUFFIXES ${_TBB_LIB_PATH_SUFFIXES} - ) - - find_library(TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG - NAMES ${_TBB_${_TBB_COMPONENT}_LIB_NAMES_DEBUG} - HINTS ${TBB_ROOT} - PATH_SUFFIXES ${_TBB_LIB_PATH_SUFFIXES} - ) - - if (TBB_DEBUG) - message("** FindTBB: - TBB_${_TBB_COMPONENT}_INCLUDE_DIR = ${TBB_${_TBB_COMPONENT}_INCLUDE_DIR}") - message("** FindTBB: - TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE = ${TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE}") - message("** FindTBB: - TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG = ${TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG}") - endif () +# add general search paths +foreach (dir IN LISTS TBB_PREFIX_PATH) + list(APPEND TBB_LIB_SEARCH_PATH ${dir}/lib ${dir}/Lib ${dir}/lib/tbb + ${dir}/Libs) + list(APPEND TBB_INC_SEARCH_PATH ${dir}/include ${dir}/Include + ${dir}/include/tbb) +endforeach () - # Mark cache entries as advanced - mark_as_advanced(TBB_${_TBB_COMPONENT}_INCLUDE_DIR) - mark_as_advanced(TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE) - mark_as_advanced(TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG) - - # Set TBB__LIBRARY - if (TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE AND TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG) - set(TBB_${_TBB_COMPONENT}_LIBRARY - optimized ${TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE} - debug ${TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG} - ) - elseif (TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE) - set(TBB_${_TBB_COMPONENT}_LIBRARY ${TBB_${_TBB_COMPONENT}_LIBRARY_RELEASE}) - elseif (TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG) - set(TBB_${_TBB_COMPONENT}_LIBRARY ${TBB_${_TBB_COMPONENT}_LIBRARY_DEBUG}) - else () - set(TBB_${_TBB_COMPONENT}_LIBRARY TBB_${_TBB_COMPONENT}_LIBRARY-NOTFOUND) - endif () +set(TBB_LIBRARY_NAMES tbb) +get_debug_names(TBB_LIBRARY_NAMES) - # Set TBB__FOUND - if (TBB_${_TBB_COMPONENT}_INCLUDE_DIR AND TBB_${_TBB_COMPONENT}_LIBRARY) - set(TBB_${_TBB_COMPONENT}_FOUND TRUE) - else () - set(TBB_${_TBB_COMPONENT}_FOUND FALSE) - endif () - set(TBB_${_TBB_${_TBB_COMPONENT}_NAME}_FOUND ${TBB_${_TBB_COMPONENT}_FOUND}) - if (TBB_${_TBB_COMPONENT}_FOUND) +find_path(TBB_INCLUDE_DIR + NAMES tbb/tbb.h + PATHS ${TBB_INC_SEARCH_PATH}) + +find_library(TBB_LIBRARY_RELEASE + NAMES ${TBB_LIBRARY_NAMES} + PATHS ${TBB_LIB_SEARCH_PATH}) +find_library(TBB_LIBRARY_DEBUG + NAMES ${TBB_LIBRARY_NAMES_DEBUG} + PATHS ${TBB_LIB_SEARCH_PATH}) +make_library_set(TBB_LIBRARY) + +findpkg_finish(TBB tbb) + +#if we haven't found TBB no point on going any further +if (NOT TBB_FOUND) + return() +endif () - # Add transitive dependencies - set(TBB_${_TBB_COMPONENT}_INCLUDE_DIRS ${TBB_${_TBB_COMPONENT}_INCLUDE_DIR}) - set(TBB_${_TBB_COMPONENT}_LIBRARIES ${TBB_${_TBB_COMPONENT}_LIBRARY}) - if (_TBB_${_TBB_COMPONENT}_LIB_LINK_DEPENDS) - list(APPEND TBB_${_TBB_COMPONENT}_LIBRARIES "${_TBB_${_TBB_COMPONENT}_LIB_LINK_DEPENDS}") - endif () +#============================================================================= +# Look for TBB's malloc package +set(TBB_MALLOC_LIBRARY_NAMES tbbmalloc) +get_debug_names(TBB_MALLOC_LIBRARY_NAMES) - if (TBB_DEBUG) - message("** FindTBB: - TBB_${_TBB_COMPONENT}_INCLUDE_DIRS = [${TBB_${_TBB_COMPONENT}_INCLUDE_DIRS}]") - message("** FindTBB: - TBB_${_TBB_COMPONENT}_LIBRARIES = [${TBB_${_TBB_COMPONENT}_LIBRARIES}]") - endif () +find_path(TBB_MALLOC_INCLUDE_DIR + NAMES tbb/tbb.h + PATHS ${TBB_INC_SEARCH_PATH}) - # Add to TBB_INCLUDE_DIRS and TBB_LIBRARIES - list(APPEND TBB_INCLUDE_DIRS ${TBB_${_TBB_COMPONENT}_INCLUDE_DIRS}) - list(APPEND TBB_LIBRARIES ${TBB_${_TBB_COMPONENT}_LIBRARIES}) - - # Add TBB:: import target - string(TOLOWER ${_TBB_COMPONENT} _TBB_TARGET_NAME) - set(_TBB_TARGET_NAME "TBB::${_TBB_TARGET_NAME}") - add_library(${_TBB_TARGET_NAME} SHARED IMPORTED) - - set_target_properties(${_TBB_TARGET_NAME} PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${TBB_${_TBB_COMPONENT}_INCLUDE_DIRS}" - IMPORTED_LINK_INTERFACE_LANGUAGES CXX - IMPORTED_NO_SONAME TRUE - ) - if (_TBB_${_TBB_COMPONENT}_LIB_LINK_DEPENDS) - set_target_properties(${_TBB_TARGET_NAME} PROPERTIES - INTERFACE_LINK_LIBRARIES "${_TBB_${_TBB_COMPONENT}_LIB_LINK_DEPENDS}" - ) - endif () +find_library(TBB_MALLOC_LIBRARY_RELEASE + NAMES ${TBB_MALLOC_LIBRARY_NAMES} + PATHS ${TBB_LIB_SEARCH_PATH}) +find_library(TBB_MALLOC_LIBRARY_DEBUG + NAMES ${TBB_MALLOC_LIBRARY_NAMES_DEBUG} + PATHS ${TBB_LIB_SEARCH_PATH}) +make_library_set(TBB_MALLOC_LIBRARY) - foreach (_TBB_CONFIGURATION IN ITEMS DEBUG RELEASE) - if (TBB_${_TBB_COMPONENT}_LIBRARY_${_TBB_CONFIGURATION}) - set_property(TARGET ${_TBB_TARGET_NAME} APPEND PROPERTY IMPORTED_CONFIGURATIONS ${_TBB_CONFIGURATION}) - if (WIN32) - set_target_properties(${_TBB_TARGET_NAME} PROPERTIES - IMPORTED_IMPLIB_${_TBB_CONFIGURATION} "${TBB_${_TBB_COMPONENT}_LIBRARY_${_TBB_CONFIGURATION}}" - ) - string(REPLACE "/lib/" "/bin/" _TBB_LIB_PATH_DLL "${TBB_${_TBB_COMPONENT}_LIBRARY_${_TBB_CONFIGURATION}}") - string(REGEX REPLACE "\\.lib$" ".dll" _TBB_LIB_PATH_DLL "${_TBB_LIB_PATH_DLL}") - if (EXISTS "${_TBB_LIB_PATH_DLL}") - set_target_properties(${_TBB_TARGET_NAME} PROPERTIES - IMPORTED_LOCATION_${_TBB_CONFIGURATION} "${_TBB_LIB_PATH_DLL}" - ) - if (TBB_DEBUG) - message("** FindTBB: - IMPORTED_LOCATION_${_TBB_CONFIGURATION} = ${_TBB_LIB_PATH_DLL}") - endif () - elseif (TBB_DEBUG) - message("** FindTBB: Could not determine ${_TBB_CONFIGURATION} DLL path from import library, tried: " - "\n\t${_TBB_LIB_PATH_DLL}") - endif () - else () - set_target_properties(${_TBB_TARGET_NAME} PROPERTIES - IMPORTED_LOCATION_${_TBB_CONFIGURATION} "${TBB_${_TBB_COMPONENT}_LIBRARY_${_TBB_CONFIGURATION}}" - ) - endif () - endif () - endforeach () +findpkg_finish(TBB_MALLOC tbbmalloc) - if (TBB_DEBUG) - message("** FindTBB: Looking for component ${_TBB_COMPONENT}... - found") - endif () +#============================================================================= +# Look for TBB's malloc proxy package +set(TBB_MALLOC_PROXY_LIBRARY_NAMES tbbmalloc_proxy) +get_debug_names(TBB_MALLOC_PROXY_LIBRARY_NAMES) - else () +find_path(TBB_MALLOC_PROXY_INCLUDE_DIR + NAMES tbb/tbbmalloc_proxy.h + PATHS ${TBB_INC_SEARCH_PATH}) - if (TBB_DEBUG) - message("** FindTBB: Looking for component ${_TBB_COMPONENT}... - not found") - endif () - unset(TBB_${_TBB_COMPONENT}_INCLUDE_DIRS) - unset(TBB_${_TBB_COMPONENT}_LIBRARIES) +find_library(TBB_MALLOC_PROXY_LIBRARY_RELEASE + NAMES ${TBB_MALLOC_PROXY_LIBRARY_NAMES} + PATHS ${TBB_LIB_SEARCH_PATH}) +find_library(TBB_MALLOC_PROXY_LIBRARY_DEBUG + NAMES ${TBB_MALLOC_PROXY_LIBRARY_NAMES_DEBUG} + PATHS ${TBB_LIB_SEARCH_PATH}) +make_library_set(TBB_MALLOC_PROXY_LIBRARY) - endif () -endforeach () +findpkg_finish(TBB_MALLOC_PROXY tbbmalloc_proxy) -if (TBB_INCLUDE_DIRS) - list(REMOVE_DUPLICATES TBB_INCLUDE_DIRS) -endif () -if (TBB_DEBUG) - message("** FindTBB: Include paths and libraries of all found components:") - message("** FindTBB: - TBB_INCLUDE_DIRS = [${TBB_INCLUDE_DIRS}]") - message("** FindTBB: - TBB_LIBRARIES = [${TBB_LIBRARIES}]") -endif () +#============================================================================= +#parse all the version numbers from tbb +if(NOT TBB_VERSION) -# ------------------------------------------------------------------------------ -# Extract library version from start of tbb_stddef.h -if (TBB_INCLUDE_DIR) - if (NOT DEFINED TBB_VERSION_MAJOR OR - NOT DEFINED TBB_VERSION_MINOR OR - NOT DEFINED TBB_INTERFACE_VERSION OR - NOT DEFINED TBB_COMPATIBLE_INTERFACE_VERSION) - file(READ "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h" _TBB_VERSION_CONTENTS LIMIT 2048) - string(REGEX REPLACE - ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" - TBB_VERSION_MAJOR "${_TBB_VERSION_CONTENTS}" - ) - string(REGEX REPLACE - ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" - TBB_VERSION_MINOR "${_TBB_VERSION_CONTENTS}" - ) - string(REGEX REPLACE - ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" - TBB_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}" - ) - string(REGEX REPLACE - ".*#define TBB_COMPATIBLE_INTERFACE_VERSION ([0-9]+).*" "\\1" - TBB_COMPATIBLE_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}" - ) - unset(_TBB_VERSION_CONTENTS) - endif () - set(TBB_VERSION "${TBB_VERSION_MAJOR}.${TBB_VERSION_MINOR}") - set(TBB_VERSION_STRING "${TBB_VERSION}") -else () - unset(TBB_VERSION) - unset(TBB_VERSION_MAJOR) - unset(TBB_VERSION_MINOR) - unset(TBB_VERSION_STRING) - unset(TBB_INTERFACE_VERSION) - unset(TBB_COMPATIBLE_INTERFACE_VERSION) -endif () + #only read the start of the file + file(STRINGS + "${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h" + TBB_VERSION_CONTENTS + REGEX "VERSION") -if (TBB_DEBUG) - message("** FindTBB: Version information from ${TBB_INCLUDE_DIR}/tbb/tbb_stddef.h") - message("** FindTBB: - TBB_VERSION_STRING = ${TBB_VERSION_STRING}") - message("** FindTBB: - TBB_VERSION_MAJOR = ${TBB_VERSION_MAJOR}") - message("** FindTBB: - TBB_VERSION_MINOR = ${TBB_VERSION_MINOR}") - message("** FindTBB: - TBB_INTERFACE_VERSION = ${TBB_INTERFACE_VERSION}") - message("** FindTBB: - TBB_COMPATIBLE_INTERFACE_VERSION = ${TBB_COMPATIBLE_INTERFACE_VERSION}") -endif () + string(REGEX REPLACE + ".*#define TBB_VERSION_MAJOR ([0-9]+).*" "\\1" + TBB_VERSION_MAJOR "${TBB_VERSION_CONTENTS}") -# ------------------------------------------------------------------------------ -# Handle QUIET, REQUIRED, and [EXACT] VERSION arguments and set TBB_FOUND -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(TBB - REQUIRED_VARS TBB_INCLUDE_DIR - VERSION_VAR TBB_VERSION - HANDLE_COMPONENTS -) - -if (NOT TBB_FIND_QUIETLY) - if (TBB_FOUND) - message(STATUS "${_TBB_FIND_STATUS}... - found v${TBB_VERSION_STRING}") - else () - message(STATUS "${_TBB_FIND_STATUS}... - not found") - endif () -endif () + string(REGEX REPLACE + ".*#define TBB_VERSION_MINOR ([0-9]+).*" "\\1" + TBB_VERSION_MINOR "${TBB_VERSION_CONTENTS}") -# ------------------------------------------------------------------------------ -# Unset local auxiliary variables -foreach (_TBB_COMPONENT IN ITEMS TBB MALLOC MALLOC_PROXY) - unset(_TBB_FIND_REQUIRED_${_TBB_COMPONENT}) - unset(_TBB_${_TBB_COMPONENT}_LIB_NAMES_RELEASE) - unset(_TBB_${_TBB_COMPONENT}_LIB_NAMES_DEBUG) - unset(_TBB_${_TBB_COMPONENT}_LIB_LINK_DEPENDS) - unset(_TBB_${_TBB_COMPONENT}_INC_NAMES) - unset(_TBB_${_TBB_COMPONENT}_NAME) -endforeach () + string(REGEX REPLACE + ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" + TBB_INTERFACE_VERSION "${TBB_VERSION_CONTENTS}") + + string(REGEX REPLACE + ".*#define TBB_COMPATIBLE_INTERFACE_VERSION ([0-9]+).*" "\\1" + TBB_COMPATIBLE_INTERFACE_VERSION "${TBB_VERSION_CONTENTS}") -unset(_TBB_COMPONENT) -unset(_TBB_TARGET_NAME) -unset(_TBB_FIND_COMPONENTS) -unset(_TBB_FIND_STATUS) -unset(_TBB_INC_PATH_SUFFIXES) -unset(_TBB_LIB_PATH_SUFFIXES) -unset(_TBB_LIB_PATH_DLL) -unset(_TBB_LIB_NAME) -unset(_TBB_ARCH_PLATFORM) +endif() diff --git a/cmake/yacma/LICENSE b/cmake/yacma/LICENSE index 3203c9a..edd52cf 100644 --- a/cmake/yacma/LICENSE +++ b/cmake/yacma/LICENSE @@ -1,6 +1,6 @@ The MIT License (MIT) -Copyright (c) 2016-2018 Francesco Biscani +Copyright (c) 2016-2020 Francesco Biscani Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/cmake/yacma/YACMACompilerLinkerSettings.cmake b/cmake/yacma/YACMACompilerLinkerSettings.cmake index 97659bf..7d7aa1b 100644 --- a/cmake/yacma/YACMACompilerLinkerSettings.cmake +++ b/cmake/yacma/YACMACompilerLinkerSettings.cmake @@ -27,15 +27,11 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(YACMA_COMPILER_IS_GNUCXX TRUE) endif() -# Detect the hcc compiler. -if(YACMA_COMPILER_IS_CLANGXX AND "${CMAKE_CXX_COMPILER}" MATCHES "hcc") - set(YACMA_COMPILER_IS_HCC TRUE) -endif() - # This is an OS X specific setting that is suggested to be enabled. See: # https://blog.kitware.com/upcoming-in-cmake-2-8-12-osx-rpath-support/ # http://stackoverflow.com/questions/31561309/cmake-warnings-under-os-x-macosx-rpath-is-not-specified-for-the-following-targe -if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +if(APPLE) + message(STATUS "OSX detected, setting the 'CMAKE_MACOSX_RPATH' option to TRUE.") set(CMAKE_MACOSX_RPATH TRUE) endif() @@ -48,36 +44,32 @@ endfunction() # Enable conditionally a CXX flag, if supported by the compiler. # This is for flags intended to be enabled in all configurations. -# NOTE: we use macros and go through temporary private variables -# because it's apparently impossible to append to an internal +# NOTE: we use macros because it's apparently impossible to append to an internal # CACHEd list. macro(_YACMA_CHECK_ENABLE_CXX_FLAG flag) set(CMAKE_REQUIRED_QUIET TRUE) - check_cxx_compiler_flag("${flag}" YACMA_CHECK_CXX_FLAG) + check_cxx_compiler_flag("${flag}" YACMA_CHECK_CXX_FLAG::${flag}) unset(CMAKE_REQUIRED_QUIET) - if(YACMA_CHECK_CXX_FLAG) + if(YACMA_CHECK_CXX_FLAG::${flag}) message(STATUS "'${flag}': flag is supported by the compiler, enabling.") list(APPEND _YACMA_CXX_FLAGS "${flag}") else() message(STATUS "'${flag}': flag is not supported by the compiler.") endif() - # NOTE: check_cxx_compiler stores variables in the cache. - unset(YACMA_CHECK_CXX_FLAG CACHE) endmacro() # Enable conditionally a debug CXX flag, is supported by the compiler. # This is for flags intended to be enabled in debug mode. macro(_YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG flag) set(CMAKE_REQUIRED_QUIET TRUE) - check_cxx_compiler_flag("${flag}" YACMA_CHECK_DEBUG_CXX_FLAG) + check_cxx_compiler_flag("${flag}" YACMA_CHECK_DEBUG_CXX_FLAG::${flag}) unset(CMAKE_REQUIRED_QUIET) - if(YACMA_CHECK_DEBUG_CXX_FLAG) + if(YACMA_CHECK_DEBUG_CXX_FLAG::${flag}) message(STATUS "'${flag}': debug flag is supported by the compiler, enabling.") list(APPEND _YACMA_CXX_FLAGS_DEBUG "${flag}") else() message(STATUS "'${flag}': debug flag is not supported by the compiler.") endif() - unset(YACMA_CHECK_DEBUG_CXX_FLAG CACHE) endmacro() # What we want to avoid is to re-run the expensive flag checks. We will set cache variables @@ -90,6 +82,8 @@ if(NOT _YACMACompilerLinkerSettingsRun) # Configuration bits specific for GCC. if(YACMA_COMPILER_IS_GNUCXX) _YACMA_CHECK_ENABLE_CXX_FLAG(-fdiagnostics-color=auto) + # New in GCC 9. + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Waddress-of-packed-member) endif() # Configuration bits specific for clang. @@ -98,7 +92,26 @@ if(NOT _YACMACompilerLinkerSettingsRun) # for the time being. _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wshadow) # Clang is better at this flag than GCC. - _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Werror) + # NOTE: enable unconditionally, as it seems like the CMake + # machinery for detecting this fails. Perhaps the source code + # used for checking the flag emits warnings? + list(APPEND _YACMA_CXX_FLAGS_DEBUG "-Werror") + # New warnings in clang 8. + # NOTE: a few issues with macros here, let's disable for now. + # _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wextra-semi-stmt) + # New warnings in clang 10. + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wtautological-overlap-compare) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wtautological-compare) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wtautological-bitwise-compare) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wbitwise-conditional-parentheses) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wrange-loop-analysis) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wmisleading-indentation) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wc99-designator) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wreorder-init-list) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wsizeof-pointer-div) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wsizeof-array-div) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wxor-used-as-pow) + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wfinal-dtor-non-final-class) endif() # Common configuration for GCC, clang and Intel. @@ -106,7 +119,8 @@ if(NOT _YACMACompilerLinkerSettingsRun) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wall) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wextra) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wnon-virtual-dtor) - _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wnoexcept) + # NOTE: this flag is a bit too chatty, let's disable it for the moment. + #_YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wnoexcept) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wlogical-op) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wconversion) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wdeprecated) @@ -120,11 +134,7 @@ if(NOT _YACMACompilerLinkerSettingsRun) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wdisabled-optimization) # This is useful when the compiler decides the template backtrace is too verbose. _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-ftemplate-backtrace-limit=0) - if(YACMA_COMPILER_IS_HCC) - message(STATUS "hcc compiler detected, the '-fstack-protector-all' flag will not be enabled.") - else() - _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-fstack-protector-all) - endif() + _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-fstack-protector-all) # A few suggestion flags. _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wsuggest-attribute=pure) _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(-Wsuggest-attribute=const) @@ -166,7 +176,7 @@ if(NOT _YACMACompilerLinkerSettingsRun) endif() # MSVC setup. - if(YACMA_COMPILER_IS_MSVC) + if(YACMA_COMPILER_IS_MSVC AND NOT YACMA_COMPILER_IS_CLANGXX) # Enable higher warning level than usual. _YACMA_CHECK_ENABLE_DEBUG_CXX_FLAG(/W4) # Treat warnings as errors. diff --git a/cmake/yacma/YACMAPythonSetup.cmake b/cmake/yacma/YACMAPythonSetup.cmake index 126fd1f..2d31ceb 100644 --- a/cmake/yacma/YACMAPythonSetup.cmake +++ b/cmake/yacma/YACMAPythonSetup.cmake @@ -3,14 +3,8 @@ if(YACMAPythonSetupIncluded) endif() # NOTE: this is a heuristic to determine whether we need to link to the Python library. -# In theory, Python extensions don't need to, as they are dlopened() by the Python process -# and thus they don't need to be linked to the Python library at compile time. However, -# the dependency on Boost.Python muddies the waters, as BP itself does link to the Python -# library, at least on some platforms. The following configuration seems to be working fine -# on various CI setups. -# NOTE: apparently homebrew requires NOT to link to the Python library. We might want -# to add a config option to accommodate that eventually. -if(WIN32 OR ${CMAKE_SYSTEM_NAME} MATCHES "Darwin") +# The linking seems to be necessary only on Windows. +if(WIN32) message(STATUS "Python modules require linking to the Python library.") set(_YACMA_PYTHON_MODULE_NEED_LINK TRUE) else() @@ -42,6 +36,11 @@ else() endif() mark_as_advanced(YACMA_PYTHON_INCLUDE_DIR) +# Add an interface imported target for the +# Python include dir. +add_library(YACMA::PythonIncludeDir INTERFACE IMPORTED) +set_target_properties(YACMA::PythonIncludeDir PROPERTIES INTERFACE_INCLUDE_DIRECTORIES ${YACMA_PYTHON_INCLUDE_DIR}) + message(STATUS "Python interpreter: ${PYTHON_EXECUTABLE}") message(STATUS "Python interpreter version: ${PYTHON_VERSION_STRING}") if(_YACMA_PYTHON_MODULE_NEED_LINK) @@ -49,16 +48,6 @@ if(_YACMA_PYTHON_MODULE_NEED_LINK) endif() message(STATUS "Python include dir: ${YACMA_PYTHON_INCLUDE_DIR}") -# An imported target to be used when building extension modules. -if(_YACMA_PYTHON_MODULE_NEED_LINK) - add_library(YACMA::PythonModule UNKNOWN IMPORTED) - set_target_properties(YACMA::PythonModule PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${YACMA_PYTHON_INCLUDE_DIR}" - IMPORTED_LOCATION "${PYTHON_LIBRARIES}" IMPORTED_LINK_INTERFACE_LANGUAGES "C") -else() - add_library(YACMA::PythonModule INTERFACE IMPORTED) - set_target_properties(YACMA::PythonModule PROPERTIES INTERFACE_INCLUDE_DIRECTORIES "${YACMA_PYTHON_INCLUDE_DIR}") -endif() - # This flag is used to signal the need to override the default extension of the Python modules # depending on the architecture. Under Windows, for instance, CMake produces shared objects as # .dll files, but Python from 2.5 onwards requires .pyd files (hence the need to override). @@ -66,7 +55,7 @@ set(_YACMA_PY_MODULE_EXTENSION "") # Platform-specific setup. if(UNIX) - if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin") + if(APPLE) message(STATUS "OS X platform detected.") # Apparently on OS X Python expects the .so extension for compiled modules. message(STATUS "Output extension for compiled modules will be '.so'.") @@ -125,11 +114,9 @@ function(YACMA_PYTHON_MODULE name) # with clang and gcc. See: # https://bugs.python.org/issue11149 # http://www.python.org/dev/peps/pep-3123/ - # NOTE: not sure here how we should set flags up for MSVC or clang on windows, need - # to check in the future. # NOTE: do not use the yacma compiler linker settings bits, so this module # can be used stand-alone. - if(CMAKE_COMPILER_IS_GNUCXX OR ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + if(CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang" AND NOT MSVC)) message(STATUS "Setting up extra compiler flag '-fwrapv' for the Python module '${name}'.") target_compile_options(${name} PRIVATE "-fwrapv") if(${PYTHON_VERSION_MAJOR} LESS 3) @@ -137,7 +124,22 @@ function(YACMA_PYTHON_MODULE name) target_compile_options(${name} PRIVATE "-fno-strict-aliasing") endif() endif() - target_link_libraries("${name}" PRIVATE YACMA::PythonModule) + if(APPLE AND ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") + # On OSX + Clang this link flag is apparently necessary in order to avoid + # undefined references to symbols defined in the Python library. See also: + # https://github.com/potassco/clingo/issues/79 + # https://stackoverflow.com/questions/25421479/clang-and-undefined-symbols-when-building-a-library + # https://cmake.org/pipermail/cmake/2017-March/065115.html + set_target_properties(${name} PROPERTIES LINK_FLAGS "-undefined dynamic_lookup") + endif() + + # Add the Python include dirs. + target_include_directories("${name}" SYSTEM PRIVATE ${YACMA_PYTHON_INCLUDE_DIR}) + + # Link to the Python libs, if necessary. + if(_YACMA_PYTHON_MODULE_NEED_LINK) + target_link_libraries("${name}" PRIVATE ${PYTHON_LIBRARIES}) + endif() endfunction() # Mark as included. diff --git a/cmake/yacma/YACMAThreadingSetup.cmake b/cmake/yacma/YACMAThreadingSetup.cmake index c9e24f9..011e512 100644 --- a/cmake/yacma/YACMAThreadingSetup.cmake +++ b/cmake/yacma/YACMAThreadingSetup.cmake @@ -40,11 +40,6 @@ if(MINGW) list(APPEND YACMA_THREADING_CXX_FLAGS "-mthreads") endif() -if(YACMA_COMPILER_IS_MSVC) - message(STATUS "Enabling the '_MT' definition for MSVC.") - list(APPEND YACMA_THREADING_CXX_FLAGS "/D_MT") -endif() - # Check if we have thread_local. # NOTE: we need to double check what happens with OSX's clang here. list(FIND CMAKE_CXX_COMPILE_FEATURES "cxx_thread_local" YACMA_HAVE_THREAD_LOCAL) diff --git a/include/rakau/detail/di_aligned_allocator.hpp b/include/rakau/detail/di_aligned_allocator.hpp index d8e86f5..773d9d6 100644 --- a/include/rakau/detail/di_aligned_allocator.hpp +++ b/include/rakau/detail/di_aligned_allocator.hpp @@ -11,6 +11,7 @@ #include #include +#include #include #include #include @@ -47,11 +48,20 @@ struct di_aligned_allocator { // https://en.cppreference.com/w/cpp/memory/allocator using is_always_equal = std::true_type; using propagate_on_container_move_assignment = std::true_type; + // Max number of allocatable objects. + size_type max_size() const + { + // For allocating N value_types, we might need up to + // N*sizeof(value_type) + Alignment - 1 bytes. This number + // must be representable by size_type. + // NOTE: drop the -1 from the computation to simplify. + return (std::numeric_limits::max() - Alignment) / sizeof(value_type); + } // Allocation. T *allocate(size_type n) const { // Total size in bytes. This is prevented from being too large - // by the default implementation of max_size(). + // by max_size(). const auto size = n * sizeof(T); void *retval; if (Alignment == 0u) { @@ -64,6 +74,8 @@ struct di_aligned_allocator { // we will set retval to nullptr to signal that the allocation failed // (so that we can handle the allocation failure in the same codepath // as aligned_alloc()). + // NOTE: unlike aligned_alloc(), posix_memalign() does not have any + // constraint on the allocation size. if (::posix_memalign(&retval, Alignment, size)) { retval = nullptr; } @@ -75,7 +87,13 @@ struct di_aligned_allocator { // NOTE: some early versions of GCC put aligned_alloc in the root namespace rather // than std, so let's try to workaround. using namespace std; - retval = aligned_alloc(Alignment, size); + + const auto rem = size % Alignment; + if (rem) { + retval = aligned_alloc(Alignment, size + (Alignment - rem)); + } else { + retval = aligned_alloc(Alignment, size); + } #endif } if (!retval) { diff --git a/include/rakau/detail/tree_coll.hpp b/include/rakau/detail/tree_coll.hpp new file mode 100644 index 0000000..efe3aad --- /dev/null +++ b/include/rakau/detail/tree_coll.hpp @@ -0,0 +1,605 @@ +// Copyright 2018 Francesco Biscani (bluescarni@gmail.com) +// +// This file is part of the rakau library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef RAKAU_DETAIL_TREE_COLL_HPP +#define RAKAU_DETAIL_TREE_COLL_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include + +namespace rakau +{ + +inline namespace detail +{ + +// Return the largest node level, in a domain of size box_size, +// such that the node size is larger than the value s. If s >= box_size, +// 0 will be returned. +template +inline UInt tree_coll_size_to_level(F s, F box_size) +{ + assert(std::isfinite(s)); + // NOTE: we assume we never enter here + // with a null node size. + assert(s > F(0)); + assert(std::isfinite(box_size)); + + constexpr auto cbits = cbits_v; + + UInt retval = 0; + auto cur_node_size = box_size; + + // Don't return a result larger than cbits + // (the max level). + for (; retval != cbits; ++retval) { + const auto next_node_size = cur_node_size * (F(1) / F(2)); + if (next_node_size <= s) { + // The next node size is not larger + // than the target. Break out and + // return the current retval; + break; + } + cur_node_size = next_node_size; + } + + return retval; +} + +// Compute the codes of the vertices of an AABB of size aabb_size around +// the point p_pos. The coordinates of the AABB vertices will be clamped +// to the domain boundaries (via inv_box_size). +template +inline auto +#if defined(__GNUC__) + // NOTE: unsure why, but disabling inlining + // on this function is a big performance win on some + // test cases. Need to investigate. + __attribute__((noinline)) +#endif + tree_coll_get_aabb_codes(const std::array &p_pos, F aabb_size, F inv_box_size) +{ + // p_pos must contain finite values. + assert(std::all_of(p_pos.begin(), p_pos.end(), [](F x) { return std::isfinite(x); })); + // aabb_size finite and non-negative. + // NOTE: empty AABBs are allowed. + assert(std::isfinite(aabb_size) && aabb_size >= F(0)); + + // The number of vertices of the AABB is 2**NDim. + static_assert(NDim < unsigned(std::numeric_limits::digits), "Overflow error."); + constexpr auto n_points = std::size_t(1) << NDim; + + // Compute the min/max coordinates of the AABB (in 2D, the lower-left + // and upper-right corners of the AABB). + const auto aabb_minmax_coords = [half = aabb_size * (F(1) / F(2)), &p_pos]() { + std::array, 2> retval; + + for (std::size_t i = 0; i < NDim; ++i) { + const auto min_c = p_pos[i] - half; + const auto max_c = p_pos[i] + half; + + // Check them. + if (rakau_unlikely(!std::isfinite(min_c) || !std::isfinite(max_c) || min_c > max_c)) { + throw std::invalid_argument( + "The computation of the min/max coordinates of an AABB produced the invalid pair of values (" + + std::to_string(min_c) + ", " + std::to_string(max_c) + ")"); + } + + retval[0][i] = min_c; + retval[1][i] = max_c; + } + + return retval; + }(); + + // The return value. + std::array aabb_codes; + + // Fill in aabb_codes. The idea here is that + // we need to generate all the 2**NDim possible combinations of min/max + // aabb coordinates. We do it via the bit-level representation + // of the numbers from 0 to 2**NDim - 1u. For instance, in 3 dimensions, + // we have the numbers from 0 to 7 included: + // + // 0 0 0 | i = 0 + // 0 0 1 | i = 1 + // 0 1 0 | i = 2 + // 0 1 1 | i = 3 + // ... + // 1 1 1 | i = 7 + // + // We interpret a zero bit as setting the min aabb coordinate, + // a one bit as setting the max aabb coordinate. So, for instance, + // i = 3 corresponds to the aabb point (min, max, max). + std::array tmp_disc; + morton_encoder me; + for (std::size_t i = 0; i < n_points; ++i) { + for (std::size_t j = 0; j < NDim; ++j) { + const auto idx = (i >> j) & 1u; + // NOTE: discretize with clamping. + tmp_disc[j] = disc_single_coord(aabb_minmax_coords[idx][j], inv_box_size); + } + aabb_codes[i] = me(tmp_disc.data()); + } + + return aabb_codes; +} + +// Given a nodal code and its level, determine the +// closed range of [min, max] highest level nodal codes +// (that is, positional codes with an extra 1 bit on top) +// belonging to the node. +template +inline std::pair tree_coll_node_range(UInt code, UInt level) +{ + constexpr auto cbits = cbits_v; + + assert(tree_level(code) == level); + assert(cbits >= level); + + const auto shift_amount = (cbits - level) * NDim; + + // Move up, right filling zeroes. + const UInt min = code << shift_amount; + // Turn the filling zeroes into ones. + const UInt max = min + (UInt(-1) >> (unsigned(std::numeric_limits::digits) - shift_amount)); + + return std::make_pair(min, max); +} + +} // namespace detail + +// Return a vector of indices into the tree structure +// representing the ordered set of leaf nodes +// NOTE: need to understand if/when/how this should be parallelised. +template +inline auto tree::coll_leaves_permutation() const +{ + // Prepare the output. + std::vector retval; + const auto tsize = m_tree.size(); + retval.reserve(static_cast(tsize)); + + for (size_type i = 0; i < tsize; ++i) { + if (!m_tree[i].n_children) { + // Leaf node, add it. + retval.push_back(i); + } + } + +#if !defined(NDEBUG) + // Verify the output. + size_type tot_parts = 0; + for (auto idx : retval) { + assert(m_tree[idx].n_children == 0u); + tot_parts += m_tree[idx].end - m_tree[idx].begin; + } + assert(tot_parts == m_parts[0].size()); +#endif + + return retval; +} + +template +template +inline void tree::compute_cgraph_impl(std::vector> &cgraph, + It it) const +{ + simple_timer st("overall cgraph computation"); + + // Check that we can index into It at least + // up to the number of particles. + detail::it_diff_check(m_parts[0].size()); + + // The vector for iterating over the leaf nodes, and the corresponding + // permutation iterators. + decltype(coll_leaves_permutation()) clp; + decltype(boost::make_permutation_iterator(m_tree.begin(), clp.begin())) c_begin, c_end; + + // The vector of additional particles for each leaf node. + std::vector> v_add; + + // Prepare storage for cgraph in parallel + // with the v_add computation. + tbb::parallel_invoke( + [this, &cgraph]() { + simple_timer st("cgraph prepare"); + + // Check if the return value is empty. + const auto was_empty = cgraph.empty(); + + cgraph.resize(boost::numeric_cast(m_parts[0].size())); + + if (!was_empty) { + // If the return value was not originally empty, + // we must make sure that all its vectors are cleared + // up before we write into them. + tbb::parallel_for(tbb::blocked_range(cgraph.begin(), cgraph.end()), [](const auto &r) { + for (auto &v : r) { + v.clear(); + } + }); + } + }, + [this, &clp, &v_add, it, &c_begin, &c_end]() { + { + simple_timer st("leaves permutation"); + clp = coll_leaves_permutation(); + } + + { + simple_timer st("v_add prepare"); + v_add.resize(boost::numeric_cast(clp.size())); + } + + simple_timer st("v_add computation"); + + // Create the iterators for accessing the leaf nodes. + // NOTE: make sure we don't overflow when indexing. + detail::it_diff_check(m_tree.size()); + c_begin = boost::make_permutation_iterator(m_tree.begin(), clp.begin()); + c_end = boost::make_permutation_iterator(m_tree.end(), clp.end()); + + // Pre-compute the inverse of the domain size. + const auto inv_box_size = F(1) / m_box_size; + + // Determine the additional particles for each node. + tbb::parallel_for(tbb::blocked_range(c_begin, c_end), [this, &v_add, inv_box_size, it, c_begin, + c_end](const auto &r) { + // Temporary vector for the particle position. + std::array p_pos; + + // Iteration over the leaf nodes. + for (const auto &lnode : r) { + // Fetch/cache some properties of the leaf node. + const auto lcode = lnode.code; + const auto llevel = lnode.level; + + // Iterate over the particles of the leaf node. + const auto e = lnode.end; + for (auto pidx = lnode.begin; pidx != e; ++pidx) { + // Load the particle's AABB size. + // NOTE: if Ordered, we must transform the original + // pidx, as 'it' points to a vector of aabb sizes sorted + // in the original order. + const auto aabb_size = Ordered ? *(it + m_perm[pidx]) : *(it + pidx); + + // Check it. + if (rakau_unlikely(!std::isfinite(aabb_size) || aabb_size < F(0))) { + throw std::invalid_argument( + "An invalid AABB size was detected while computing the collision " + "graph: the AABB size must be finite and non-negative, but it is " + + std::to_string(aabb_size) + " instead"); + } + + // Particles with a null AABB don't participate in collision + // detection and cannot straddle into other nodes. + if (aabb_size == F(0)) { + continue; + } + + // Fill in the particle position. + for (std::size_t j = 0; j < NDim; ++j) { + p_pos[j] = m_parts[j][pidx]; + } + + // Compute the clamped codes of the AABB vertices. + const auto aabb_codes = detail::tree_coll_get_aabb_codes(p_pos, aabb_size, inv_box_size); + + // Fetch the number of vertices. + constexpr auto n_vertices = std::tuple_size_v>; + + // Check if the particle straddles. + // NOTE: the idea here is that the codes for all positions in the current node share + // the same initial llevel*NDim digits, which we can determine from the particle code. + const auto straddles = [llevel, &aabb_codes, pcode = m_codes[pidx]]() { + const auto shift_amount = (cbits - llevel) * NDim; + const auto common_prefix = pcode >> shift_amount; + + for (std::size_t i = 0; i < n_vertices; ++i) { + if (aabb_codes[i] >> shift_amount != common_prefix) { + return true; + } + } + + return false; + }(); + + if (!straddles) { + // The particle does not straddle, move on. + continue; + } + + // The particle straddles. To know into which leaf nodes it straddles, + // the first step is to construct a set of adjacent nodes which are guaranteed + // to contain the AABB of pidx. We can do that by establishing for each AABB + // vertex the smallest node of size greater than aabb_size that contains + // the vertex. The set of equal-sized nodes thus determined will completely + // enclose the AABB of pidx. We will then figure out which leaf nodes + // overlap the enclosing node set. + + // Convert the AABB size to a node level. + const auto aabb_level = detail::tree_coll_size_to_level(aabb_size, m_box_size); + + // Iterate over the AABB vertices. + for (std::size_t i = 0; i < n_vertices; ++i) { + // NOTE: within this loop, we will be determining + // a node N at level aabb_level which contains a straddling + // vertex of pidx's AABB. We will then establish a range of leaf nodes + // [l_begin, l_end) that have an overlap with N. + // + // It might be that [l_begin, l_end) turns out to + // be an empty range because the particle straddles into + // an 'empty' area which is not associated to any leaf node (because + // it does not contain the centre of any particle). In such a situation, + // we will end up *not* detecting AABB collisions occurring + // in the empty area. In order to avoid this, if [l_begin, l_end) + // is an empty range we will lower the aabb_level (that is, move + // to the parent node) and repeat the search, until it yields + // a non-empty [l_begin, l_end) range. That is, we are "zooming + // out" until the empty area becomes part of some node N_bigger, which + // is guaranteed to eventually happen, and we will add pidx to + // all leaf nodes overlapping N_bigger. + + // Init the range iterators. + decltype(c_begin) l_begin, l_end; + + // In the loop below we may end up determining a node + // which fits wholly in the original leaf node. In such a case, + // we will want to skip the rest of the loop and move to the + // next vertex, because there's no need to add pidx as an + // additional particle to its original node. + bool fits = false; + + for (auto al_copy(aabb_level);; --al_copy) { + // Compute the code of the node at level al_copy + // that encloses the vertex. + const auto s_code + = (aabb_codes[i] >> ((cbits - al_copy) * NDim)) + (UInt(1) << (al_copy * NDim)); + + if (al_copy >= llevel && s_code >> ((al_copy - llevel) * NDim) == lcode) { + // The s_code node fits within the current leaf node, + // no need for further checks (i.e., s_code does not overlap + // with any leaf node other than the original one). + fits = true; + break; + } + + // Determine the code range encompassed by s_code. + const auto s_code_range = detail::tree_coll_node_range(s_code, al_copy); + + // In the set of leaf nodes, determine either the last node whose + // code range precedes s_code_range, or the first one whose code + // range has some overlap with s_code range. + l_begin + = std::lower_bound(c_begin, c_end, s_code_range, [](const auto &n, const auto &p) { + const auto n_max = detail::tree_coll_node_range(n.code, n.level).second; + return n_max < p.first; + }); + + // Now determine the first node whose code range follows s_code_range. + // NOTE: in upper_bound(), the comparator parameter types are flipped around + // wrt lower_bound(). + // NOTE: start the search from l_begin, determined above, as we know + // that l_end must be l_begin or an iterator following it. + l_end + = std::upper_bound(l_begin, c_end, s_code_range, [](const auto &p, const auto &n) { + const auto n_min = detail::tree_coll_node_range(n.code, n.level).first; + return p.second < n_min; + }); + + if (l_begin != l_end) { + // A non-empty leaf node range was found, break out + // and add pidx to all the leaf nodes in the range. + break; + } + + // An empty leaf node range was found. Enlarge the search. + // NOTE: the current al_copy level cannot be zero, because in that case we + // would be at the root node and we would've found something already. + assert(al_copy > 0u); + } + + if (fits) { + // The vertex-enclosing node fits wholly in the original leaf node. + // Move to the next vertex. + continue; + } + + // Iterate over the leaf node range, and add pidx as extra particle + // to each leaf node. + for (; l_begin != l_end; ++l_begin) { + // NOTE: don't add pidx as an extra particle + // to its original node. + if (l_begin->code != lcode) { + // NOTE: we checked earlier that c_begin's + // diff type can represent m_tree.size() and, + // by extension, v_add.size(). + v_add[static_cast(l_begin - c_begin)].push_back(pidx); + } + } + } + } + } + }); + }); + +#if !defined(NDEBUG) + // Verify v_add. + tbb::parallel_for(tbb::blocked_range(decltype(v_add.size())(0), v_add.size()), [this, &clp, &v_add](const auto &r) { + for (auto i = r.begin(); i != r.end(); ++i) { + const auto &v = v_add[i]; + for (auto idx : v) { + // The indices of the extra particles cannot + // be referring to particles already in the node. + assert(idx < m_tree[clp[i]].begin || idx >= m_tree[clp[i]].end); + } + } + }); +#endif + + // Build the collision graph. + tbb::parallel_for(tbb::blocked_range(c_begin, c_end), [this, &v_add, &cgraph, c_begin, it](const auto &r) { + // A local vector to store all the particle indices + // of a node, including the extra particles. + std::vector node_indices; + + // Vectors to hold the min/max aabb coordinates + // for a particle. + std::array min_aabb, max_aabb; + + // Iteration over the leaf nodes. + for (auto l_it = r.begin(); l_it != r.end(); ++l_it) { + // Fetch the leaf node and its vector of extra particles. + const auto &lnode = *l_it; + auto &va = v_add[static_cast(l_it - c_begin)]; + + // Write into node_indices all the particle indices belonging to the node: its original + // particles, plus the additional ones. + node_indices.resize(boost::numeric_cast(lnode.end - lnode.begin)); + std::iota(node_indices.begin(), node_indices.end(), lnode.begin); + // NOTE: the vector of extra particles might contain + // duplicates: sort it, apply std::unique() and insert + // only the non-duplicate values. + std::sort(va.begin(), va.end()); + const auto new_va_end = std::unique(va.begin(), va.end()); + node_indices.insert(node_indices.end(), va.begin(), new_va_end); + // We are done with va. Clear it and free any allocated + // storage. Like this, we are doing most of the work + // of the destructor of v_add in parallel. + va.clear(); + va.shrink_to_fit(); + + // Run the N**2 AABB overlap detection on all particles + // in node_indices. + const auto tot_n = node_indices.size(); + for (decltype(node_indices.size()) i = 0; i < tot_n; ++i) { + // Load the index of the first particle. + const auto idx1 = node_indices[i]; + + // Load the AABB size for particle idx1. + // NOTE: we checked while building v_add that all + // AABB sizes are finite and non-negative. + const auto aabb_size1 = Ordered ? *(it + m_perm[idx1]) : *(it + idx1); + + if (aabb_size1 == F(0)) { + // Skip collision detection for this particle + // if the AABB is null. + continue; + } + + // Determine the min/max aabb coordinates for the particle idx1. + for (std::size_t k = 0; k < NDim; ++k) { + min_aabb[k] = detail::fma_wrap(aabb_size1, -F(1) / F(2), m_parts[k][idx1]); + max_aabb[k] = detail::fma_wrap(aabb_size1, F(1) / F(2), m_parts[k][idx1]); + } + + for (auto j = i + 1u; j < tot_n; ++j) { + // Load the index of the second particle. + const auto idx2 = node_indices[j]; + + assert(idx1 != idx2); + + // Load the AABB size for particle idx2. + const auto aabb_size2 = Ordered ? *(it + m_perm[idx2]) : *(it + idx2); + + if (aabb_size2 == F(0)) { + // Skip collision detection for this particle + // if the AABB is null. + continue; + } + + // Check for AABB overlap. + bool overlap = true; + for (std::size_t k = 0; k < NDim; ++k) { + const auto min2 = detail::fma_wrap(aabb_size2, -F(1) / F(2), m_parts[k][idx2]); + const auto max2 = detail::fma_wrap(aabb_size2, F(1) / F(2), m_parts[k][idx2]); + + // NOTE: we regard the AABBs as closed ranges, that is, + // a single point in common in two segments is enough + // to trigger an overlap condition. Thus, test with + // >= and <=. + if (!(max_aabb[k] >= min2 && min_aabb[k] <= max2)) { + overlap = false; + break; + } + } + + if (overlap) { + // Overlap detected, record it. + if constexpr (Ordered) { + cgraph[m_perm[idx1]].push_back(m_perm[idx2]); + cgraph[m_perm[idx2]].push_back(m_perm[idx1]); + } else { + cgraph[idx1].push_back(idx2); + cgraph[idx2].push_back(idx1); + } + } + } + } + } + }); + + // Last step: sort+duplicate removal for the + // vectors in cgraph. + { + simple_timer st_cd("cgraph deduplication"); + tbb::parallel_for(tbb::blocked_range(cgraph.begin(), cgraph.end()), [](const auto &r) { + for (auto &v : r) { + detail::it_diff_check(v.size()); + + std::sort(v.begin(), v.end()); + const auto new_end = std::unique(v.begin(), v.end()); + v.resize(static_cast(new_end - v.begin())); + } + }); + } + +#if !defined(NDEBUG) + // Verify the collision graph. + tbb::parallel_for(tbb::blocked_range(decltype(cgraph.size())(0), cgraph.size()), [&cgraph](const auto &r) { + for (auto i = r.begin(); i != r.end(); ++i) { + for (auto idx : cgraph[i]) { + assert(idx < cgraph.size()); + assert(std::is_sorted(cgraph[idx].begin(), cgraph[idx].end())); + const auto new_end = std::unique(cgraph[idx].begin(), cgraph[idx].end()); + assert(new_end == cgraph[idx].end()); + const auto lb_it = std::lower_bound(cgraph[idx].begin(), cgraph[idx].end(), i); + assert(lb_it != cgraph[idx].end()); + assert(*lb_it == i); + } + } + }); +#endif +} + +} // namespace rakau + +#endif diff --git a/include/rakau/tree.hpp b/include/rakau/tree.hpp index c13776a..51d8d74 100644 --- a/include/rakau/tree.hpp +++ b/include/rakau/tree.hpp @@ -14,9 +14,6 @@ #include #include #include -#if defined(RAKAU_WITH_TIMER) -#include -#endif #include #include #include @@ -36,6 +33,10 @@ #include #include +#if defined(RAKAU_WITH_TIMER) +#include +#endif + #include #include #include @@ -371,15 +372,26 @@ struct morton_decoder<2, std::uint32_t> { }; // Discretize a coordinate in a square domain of size 1/inv_box_size. -template +// If Clamp is true, then the coordinate will be clamped within +// the domain. +// NOTE: for the eventual vectorisation of this function, it looks +// unlikely we can do it on AVX/AVX2 due to the lack of functions +// manipulating/converting 64bit integers. AVX-512 looks much more +// promising in this sense. +template inline UInt disc_single_coord(const F &x, const F &inv_box_size) { + // NOTE: this factor is the total number of available discretised + // positions across a single dimension. constexpr UInt factor = UInt(1) << cbits_v; // Translate and rescale the coordinate so that -box_size/2 becomes zero // and box_size/2 becomes 1. auto tmp = fma_wrap(x, inv_box_size, F(1) / F(2)); // Rescale by factor. + // NOTE: in theory we could avoid this extra multiplication + // by having the user pass in factor * inv_box_size, rather + // than just inv_box_size. tmp *= F(factor); // Check: don't end up with a nonfinite value. @@ -388,12 +400,18 @@ inline UInt disc_single_coord(const F &x, const F &inv_box_size) + " in a box of size " + std::to_string(F(1) / inv_box_size) + ", the non-finite value " + std::to_string(tmp) + " was generated"); } - // Check: don't end up outside the [0, factor) range. - if (rakau_unlikely(tmp < F(0) || tmp >= F(factor))) { - throw std::invalid_argument("The discretisation of the input coordinate " + std::to_string(x) - + " in a box of size " + std::to_string(F(1) / inv_box_size) - + " produced the floating-point value " + std::to_string(tmp) - + ", which is outside the allowed bounds"); + + if constexpr (Clamp) { + // If requested, clamp the coordinate to [0, factor). + tmp = std::clamp(tmp, F(0), std::nextafter(F(factor), F(-1))); + } else { + // Check: don't end up outside the [0, factor) range. + if (rakau_unlikely(tmp < F(0) || tmp >= F(factor))) { + throw std::invalid_argument("The discretisation of the input coordinate " + std::to_string(x) + + " in a box of size " + std::to_string(F(1) / inv_box_size) + + " produced the floating-point value " + std::to_string(tmp) + + ", which is outside the allowed bounds"); + } } // Cast to UInt. @@ -575,6 +593,7 @@ inline constexpr unsigned default_ncrit = 128 #endif ; + } // namespace detail namespace kwargs @@ -621,9 +640,13 @@ using f_vector = std::vector // can try to ensure that the TBB threads are scheduled with the same affinity as the affinity used to write initially // into the particle data vectors. TBB has an affinity partitioner, but it's not clear to me if we can rely on that // for efficient NUMA access. It's probably better to run some tests before embarking in this. -// - we should probably also think about replacing the morton encoder with some generic solution. It does not -// need to be super high performance, as morton encoding is hardly a bottleneck here. It's more important for it -// to be generic (i.e., work on a general number of dimensions), correct and compact. +// - we should think about replacing eventually the current morton encoder. We could try to move either +// to a fully generic solution (although it's not clear what the practical benefits would be) or to a +// higher-performance one focused on 2d/3d cases - particularly, towards vectorization: +// https://lemire.me/blog/2018/01/09/how-fast-can-you-bit-interleave-32-bit-integers-simd-edition/ +// Currently morton encoding is not really a bottleneck, however there might be some performance gains +// when vectorizing, e.g., in the collision code where we end up encoding all the AABB vertices +// of each particle. // - double precision benchmarking/tuning. // - tuning for the potential computation (possibly not much improvement to be had there, but it should be investigated // a bit at least). @@ -633,10 +656,13 @@ using f_vector = std::vector // will fail often). It's probably best to start experimenting with such size as a free parameter, check the // performance with various values and then try to understand if there's any heuristic we can deduce from that. // - quadrupole moments. -// - radix sort. +// - radix sort, or perhaps some type of sort which takes better advantage of almost-sorted data. // - would be interesting to see if we can do the permutations in-place efficiently. If that worked, it would probably // help simplifying things on the GPU side. See for instance: // https://stackoverflow.com/questions/7365814/in-place-array-reordering +// - some vectorisation in the AABB overlap checks should be possible, especially when we are doing +// overlap checks on the original particles in a leaf node (whose coordinates we can easily load in +// SIMD batches). template class tree { @@ -3470,6 +3496,37 @@ class tree accs_pots_o(acc_pot_ilist_to_array<2>(out), mac_value, std::forward(args)...); } +private: + auto coll_leaves_permutation() const; + template + void compute_cgraph_impl(std::vector> &out, It) const; + +public: + template + std::vector> compute_cgraph_u(It it) const + { + std::vector> retval; + compute_cgraph_impl(retval, it); + return retval; + } + template + std::vector> compute_cgraph_o(It it) const + { + std::vector> retval; + compute_cgraph_impl(retval, it); + return retval; + } + template + void compute_cgraph_u(std::vector> &out, It it) const + { + compute_cgraph_impl(out, it); + } + template + void compute_cgraph_o(std::vector> &out, It it) const + { + compute_cgraph_impl(out, it); + } + private: template auto exact_acc_pot_impl(size_type orig_idx, F G, F eps) const @@ -3835,4 +3892,6 @@ using octree = tree<3, F, std::size_t, MAC>; } // namespace rakau +#include + #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 900b108..62f97d0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -13,7 +13,9 @@ function(ADD_RAKAU_TESTCASE arg1) endif() add_executable(${arg1} ${arg1}.cpp) - target_link_libraries(${arg1} rakau) + # NOTE: explicitly link to tbb as we are using + # it to parallelise some tests. + target_link_libraries(${arg1} rakau TBB::tbb) target_compile_options(${arg1} PRIVATE "$<$:${RAKAU_CXX_FLAGS_DEBUG}>" "$<$:${RAKAU_CXX_FLAGS_RELEASE}>") set_target_properties(${arg1} PROPERTIES CXX_VISIBILITY_PRESET hidden) set_target_properties(${arg1} PROPERTIES VISIBILITY_INLINES_HIDDEN TRUE) @@ -28,6 +30,7 @@ ADD_RAKAU_TESTCASE(accuracy_acc_pot) ADD_RAKAU_TESTCASE(accuracy_pot) ADD_RAKAU_TESTCASE(auto_box_size) ADD_RAKAU_TESTCASE(basic) +ADD_RAKAU_TESTCASE(coll) ADD_RAKAU_TESTCASE(g_constant_acc) ADD_RAKAU_TESTCASE(g_constant_acc_pot) ADD_RAKAU_TESTCASE(g_constant_pot) diff --git a/test/coll.cpp b/test/coll.cpp new file mode 100644 index 0000000..3e19e31 --- /dev/null +++ b/test/coll.cpp @@ -0,0 +1,446 @@ +// Copyright 2018 Francesco Biscani (bluescarni@gmail.com) +// +// This file is part of the rakau library. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#include + +#define CATCH_CONFIG_MAIN +#include "catch.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include "test_utils.hpp" + +using namespace rakau; +using namespace rakau::kwargs; +using namespace rakau_test; + +std::mt19937 rng; + +template +inline void compare_results(const CGraph &cgraph, Cmp &cmp) +{ + REQUIRE(cgraph.size() == cmp.size()); + + std::atomic n_failures(0); + std::atomic n_coll(0); + + tbb::parallel_for(tbb::blocked_range(decltype(cgraph.size())(0), cgraph.size()), + [&n_failures, &cgraph, &cmp, &n_coll](const auto &r) { + auto loc_n_coll = 0ull; + + for (auto i = r.begin(); i != r.end(); ++i) { + if (cgraph[i].size() != cmp[i].size()) { + ++n_failures; + } + + std::sort(cmp[i].begin(), cmp[i].end()); + if (!std::equal(cmp[i].begin(), cmp[i].end(), cgraph[i].begin())) { + ++n_failures; + } + + loc_n_coll += cmp[i].size(); + } + + n_coll += loc_n_coll; + }); + + REQUIRE(n_failures.load() == 0); + std::cout << "Total number of collisions detected: " << n_coll.load() / 2u << '\n'; +} + +TEST_CASE("compute_cgraph_2d") +{ + constexpr auto bsize = 1.; + constexpr auto s = 200u; + + auto aabb_overlap = [](auto x1, auto y1, auto s1, auto x2, auto y2, auto s2) { + if (s1 == 0 || s2 == 0) { + return false; + } + + auto xmin1 = x1 - s1 / 2; + auto xmax1 = x1 + s1 / 2; + auto ymin1 = y1 - s1 / 2; + auto ymax1 = y1 + s1 / 2; + + auto xmin2 = x2 - s2 / 2; + auto xmax2 = x2 + s2 / 2; + auto ymin2 = y2 - s2 / 2; + auto ymax2 = y2 + s2 / 2; + + return xmax1 >= xmin2 && xmin1 <= xmax2 && ymax1 >= ymin2 && ymin1 <= ymax2; + }; + + std::vector aabb_sizes(s, 1.); + + // Start with an empty tree. + quadtree t; + REQUIRE(t.compute_cgraph_o(aabb_sizes.data()).empty()); + REQUIRE(t.compute_cgraph_u(aabb_sizes.data()).empty()); + + // Test with various leaf node sizes. + for (auto mln : {1, 4, 8, 16, 400}) { + // Test with a variety of aabb sizes, starting from + // very small until encompassing the whole domain. + for (long k = 15; k >= -1; --k) { + std::cout << "Testing mln=" << mln << ", k=" << k << '\n'; + + // Fill with random data. + auto parts = get_uniform_particles<2>(s, bsize, rng); + + const auto xc_o = parts.begin() + s; + const auto yc_o = parts.begin() + 2u * s; + + t = quadtree{x_coords = xc_o, y_coords = yc_o, masses = parts.begin(), + nparts = s, box_size = bsize, max_leaf_n = mln}; + + const auto [xc_u, yc_u, m_u] = t.p_its_u(); + detail::ignore(m_u); + + // Collision graph that will be computed with the N**2 algorithm. + std::vector> cmp; + cmp.resize(s); + + // All aabbs same (nonzero) size. + // NOTE: for k == -1, pick an AABB size much larger than the domain. + const auto aabb_size = k >= 0 ? 1. / static_cast(1l << k) : 10.; + std::fill(aabb_sizes.begin(), aabb_sizes.end(), aabb_size); + + // Unordered testing. + auto cgraph_u = t.compute_cgraph_u(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], aabb_size, xc_u[j], yc_u[j], aabb_size)) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Ordered testing. + auto cgraph_o = t.compute_cgraph_o(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], aabb_size, xc_o[j], yc_o[j], aabb_size)) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Set the aabb size to zero for half the points. + for (auto i = 0u; i < s; i += 2u) { + aabb_sizes[i] = 0; + } + // Build a version of aabb_sizes permuted according to the tree order. + auto aabb_sizes_u(aabb_sizes); + for (auto i = 0u; i < s; ++i) { + aabb_sizes_u[i] = aabb_sizes[t.perm()[i]]; + } + + // Redo the testing, this time with retvals passed in. + t.compute_cgraph_u(cgraph_u, aabb_sizes_u.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], aabb_sizes_u[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + t.compute_cgraph_o(cgraph_o, aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], aabb_sizes[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Try different sizes. + for (auto i = 0u; i < s; ++i) { + aabb_sizes[i] += aabb_sizes[i] / (i + 1u); + } + aabb_sizes_u = aabb_sizes; + for (auto i = 0u; i < s; ++i) { + aabb_sizes_u[i] = aabb_sizes[t.perm()[i]]; + } + + cgraph_u = t.compute_cgraph_u(aabb_sizes_u.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], aabb_sizes_u[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + cgraph_o = t.compute_cgraph_o(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], aabb_sizes[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // All zero aabb sizes. + std::fill(aabb_sizes.begin(), aabb_sizes.end(), 0.); + + t.compute_cgraph_u(cgraph_u, aabb_sizes.data()); + for (const auto &c : cgraph_u) { + REQUIRE(c.empty()); + } + + t.compute_cgraph_o(cgraph_o, aabb_sizes.data()); + for (const auto &c : cgraph_o) { + REQUIRE(c.empty()); + } + } + } +} + +TEST_CASE("compute_cgraph_3d") +{ + constexpr auto bsize = 1.; + constexpr auto s = 200u; + + auto aabb_overlap = [](auto x1, auto y1, auto z1, auto s1, auto x2, auto y2, auto z2, auto s2) { + if (s1 == 0 || s2 == 0) { + return false; + } + + auto xmin1 = x1 - s1 / 2; + auto xmax1 = x1 + s1 / 2; + auto ymin1 = y1 - s1 / 2; + auto ymax1 = y1 + s1 / 2; + auto zmin1 = z1 - s1 / 2; + auto zmax1 = z1 + s1 / 2; + + auto xmin2 = x2 - s2 / 2; + auto xmax2 = x2 + s2 / 2; + auto ymin2 = y2 - s2 / 2; + auto ymax2 = y2 + s2 / 2; + auto zmin2 = z2 - s2 / 2; + auto zmax2 = z2 + s2 / 2; + + return xmax1 >= xmin2 && xmin1 <= xmax2 && ymax1 >= ymin2 && ymin1 <= ymax2 && zmax1 >= zmin2 && zmin1 <= zmax2; + }; + + std::vector aabb_sizes(s, 1.); + + // Start with an empty tree. + octree t; + REQUIRE(t.compute_cgraph_o(aabb_sizes.data()).empty()); + REQUIRE(t.compute_cgraph_u(aabb_sizes.data()).empty()); + + // Test with various leaf node sizes. + for (auto mln : {1, 4, 8, 16, 400}) { + // Test with a variety of aabb sizes, starting from + // very small until encompassing the whole domain. + for (long k = 15; k >= -1; --k) { + std::cout << "Testing mln=" << mln << ", k=" << k << '\n'; + + // Fill with random data. + auto parts = get_uniform_particles<3>(s, bsize, rng); + + const auto xc_o = parts.begin() + s; + const auto yc_o = parts.begin() + 2u * s; + const auto zc_o = parts.begin() + 3u * s; + + t = octree{x_coords = xc_o, y_coords = yc_o, z_coords = zc_o, masses = parts.begin(), + nparts = s, box_size = bsize, max_leaf_n = mln}; + + const auto [xc_u, yc_u, zc_u, m_u] = t.p_its_u(); + detail::ignore(m_u); + + // Collision graph that will be computed with the N**2 algorithm. + std::vector> cmp; + cmp.resize(s); + + // All aabbs same (nonzero) size. + // NOTE: for k == -1, pick an AABB size much larger than the domain. + const auto aabb_size = k >= 0 ? 1. / static_cast(1l << k) : 10.; + std::fill(aabb_sizes.begin(), aabb_sizes.end(), aabb_size); + + // Unordered testing. + auto cgraph_u = t.compute_cgraph_u(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], zc_u[i], aabb_size, xc_u[j], yc_u[j], zc_u[j], aabb_size)) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Ordered testing. + auto cgraph_o = t.compute_cgraph_o(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], zc_o[i], aabb_size, xc_o[j], yc_o[j], zc_o[j], aabb_size)) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Set the aabb size to zero for half the points. + for (auto i = 0u; i < s; i += 2u) { + aabb_sizes[i] = 0; + } + // Build a version of aabb_sizes permuted according to the tree order. + auto aabb_sizes_u(aabb_sizes); + for (auto i = 0u; i < s; ++i) { + aabb_sizes_u[i] = aabb_sizes[t.perm()[i]]; + } + + // Redo the testing. + t.compute_cgraph_u(cgraph_u, aabb_sizes_u.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], zc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], zc_u[j], + aabb_sizes_u[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + t.compute_cgraph_o(cgraph_o, aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], zc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], zc_o[j], + aabb_sizes[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // Try different sizes. + for (auto i = 0u; i < s; ++i) { + aabb_sizes[i] += aabb_sizes[i] / (i + 1u); + } + aabb_sizes_u = aabb_sizes; + for (auto i = 0u; i < s; ++i) { + aabb_sizes_u[i] = aabb_sizes[t.perm()[i]]; + } + + cgraph_u = t.compute_cgraph_u(aabb_sizes_u.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_u[i], yc_u[i], zc_u[i], aabb_sizes_u[i], xc_u[j], yc_u[j], zc_u[j], + aabb_sizes_u[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_u, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + cgraph_o = t.compute_cgraph_o(aabb_sizes.data()); + for (auto i = 0u; i < s; ++i) { + for (auto j = i + 1u; j < s; ++j) { + if (aabb_overlap(xc_o[i], yc_o[i], zc_o[i], aabb_sizes[i], xc_o[j], yc_o[j], zc_o[j], + aabb_sizes[j])) { + cmp[i].push_back(j); + cmp[j].push_back(i); + } + } + } + compare_results(cgraph_o, cmp); + // Clear cmp. + for (auto &v : cmp) { + v.clear(); + } + + // All zero aabb sizes. + std::fill(aabb_sizes.begin(), aabb_sizes.end(), 0.); + + t.compute_cgraph_u(cgraph_u, aabb_sizes.data()); + for (const auto &c : cgraph_u) { + REQUIRE(c.empty()); + } + + t.compute_cgraph_o(cgraph_o, aabb_sizes.data()); + for (const auto &c : cgraph_o) { + REQUIRE(c.empty()); + } + } + } +} diff --git a/tools/install_travis.sh b/tools/install_travis.sh index 5833dc9..b797d38 100755 --- a/tools/install_travis.sh +++ b/tools/install_travis.sh @@ -25,13 +25,16 @@ elif [[ "${RAKAU_BUILD}" == "gcc7_debug" ]]; then elif [[ "${RAKAU_BUILD}" == "gcc7_debug_nosimd" ]]; then CXX=g++-7 cmake -DCMAKE_INSTALL_PREFIX=$deps_dir -DCMAKE_PREFIX_PATH=$deps_dir -DCMAKE_BUILD_TYPE=Debug -DRAKAU_BUILD_TESTS=yes -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -DRAKAU_DISABLE_SIMD" -DRAKAU_TEST_NSPLIT=${TEST_NSPLIT} -DRAKAU_TEST_SPLIT_NUM=${SPLIT_TEST_NUM} ../; make -j2 VERBOSE=1; - ctest -V; + # Don't run the collision test, as there's no SIMD-specific code in there. + ctest -V -E coll; elif [[ "${RAKAU_BUILD}" == "gcc7_debug_native" ]]; then CXX=g++-7 cmake -DCMAKE_INSTALL_PREFIX=$deps_dir -DCMAKE_PREFIX_PATH=$deps_dir -DCMAKE_BUILD_TYPE=Debug -DRAKAU_BUILD_TESTS=yes -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -march=native" -DRAKAU_TEST_NSPLIT=${TEST_NSPLIT} -DRAKAU_TEST_SPLIT_NUM=${SPLIT_TEST_NUM} ../; make -j2 VERBOSE=1; - ctest -V; + # Don't run the collision test, as there's no SIMD-specific code in there. + ctest -V -E coll; elif [[ "${RAKAU_BUILD}" == "gcc7_debug_native_norsqrt" ]]; then CXX=g++-7 cmake -DCMAKE_INSTALL_PREFIX=$deps_dir -DCMAKE_PREFIX_PATH=$deps_dir -DCMAKE_BUILD_TYPE=Debug -DRAKAU_BUILD_TESTS=yes -DRAKAU_ENABLE_RSQRT=no -DCMAKE_CXX_FLAGS="-D_GLIBCXX_DEBUG -D_GLIBCXX_DEBUG_PEDANTIC -march=native" -DRAKAU_TEST_NSPLIT=${TEST_NSPLIT} -DRAKAU_TEST_SPLIT_NUM=${SPLIT_TEST_NUM} ../; make -j2 VERBOSE=1; - ctest -V; + # Don't run the collision test, as there's no SIMD-specific code in there. + ctest -V -E coll; fi