Skip to content

Commit

Permalink
Merge pull request #265 from devreal/ttg-device-support-master-coro-w…
Browse files Browse the repository at this point in the history
…ith-stream-tasks

TTG device tasks with coroutines
  • Loading branch information
evaleev authored Feb 24, 2024
2 parents c2cae26 + 56ed942 commit f89cf45
Show file tree
Hide file tree
Showing 79 changed files with 4,731 additions and 2,598 deletions.
32 changes: 7 additions & 25 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ jobs:
fail-fast: false
matrix:
build_type : [ Release, Debug ]
os : [ macos-latest, ubuntu-20.04 ]
os : [ macos-latest, ubuntu-22.04 ]
include:
- os: ubuntu-20.04
cc: /usr/bin/gcc-10
cxx: /usr/bin/g++-10
- os: ubuntu-22.04
cc: /usr/bin/gcc-12
cxx: /usr/bin/g++-12
- os: macos-latest
cc: clang
cxx: clang++
Expand Down Expand Up @@ -48,37 +48,19 @@ jobs:
run: brew install ninja gcc@10 boost eigen open-mpi bison ccache

- name: Install prerequisites Ubuntu packages
if: ${{ matrix.os == 'ubuntu-20.04' }}
if: ${{ matrix.os == 'ubuntu-22.04' }}
run: |
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | gpg --dearmor - | sudo tee /etc/apt/trusted.gpg.d/kitware.gpg >/dev/null
sudo apt-add-repository "deb https://apt.kitware.com/ubuntu/ $(lsb_release -cs) main"
sudo apt-get update
sudo apt-get -y install ninja-build g++-10 liblapack-dev libboost-dev libboost-serialization-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison cmake
sudo apt-get -y install ninja-build g++-12 liblapack-dev libboost-dev libboost-serialization-dev libboost-random-dev libeigen3-dev openmpi-bin libopenmpi-dev libtbb-dev ccache flex bison cmake doxygen
- name: Create Build Environment
# Some projects don't allow in-source building, so create a separate build directory
# We'll use this as our working directory for all subsequent commands
run: |
cmake -E make_directory ${{github.workspace}}/build
- name: Install doxygen for Release test
if: ${{ matrix.os == 'ubuntu-20.04' }}
run: |
if [ "${{matrix.build_type}}" = "Release" ]; then
sudo apt-get -y install libclang1-9 libclang-cpp9 graphviz fonts-liberation
cd ${{github.workspace}}/build
# If we fail getting doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz from sourceforge,
# use EFV's gdrive mirror of 1.9.2 to work around the unreliable sourceforge
# the sharing link: https://drive.google.com/file/d/16GXpH4YOEUxGXQrXOKdAIibhdfzATY0d/view?usp=sharing
wget https://downloads.sourceforge.net/project/doxygen/rel-${DOXYGEN_VERSION}/doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz || wget -4 --no-check-certificate -O doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz "https://drive.google.com/uc?export=download&id=16GXpH4YOEUxGXQrXOKdAIibhdfzATY0d"
tar xzf ./doxygen-${DOXYGEN_VERSION}.linux.bin.tar.gz
export DOXYGEN_DIR=${{github.workspace}}/build/doxygen-${DOXYGEN_VERSION}
${DOXYGEN_DIR}/bin/doxygen --version
# doxygen should be in PATH in subsequent steps
echo "${DOXYGEN_DIR}/bin" >> $GITHUB_PATH
fi
- name: Prepare ccache timestamp
id: ccache_cache_timestamp
shell: cmake -P {0}
Expand Down Expand Up @@ -144,7 +126,7 @@ jobs:
cmake --build test_install_userexamples/build
- name: Build+Deploy Dox
if: ${{ matrix.os == 'ubuntu-20.04' && matrix.build_type == 'Release' && github.ref == 'refs/heads/master' }}
if: ${{ matrix.os == 'ubuntu-22.04' && matrix.build_type == 'Release' && github.ref == 'refs/heads/master' }}
working-directory: ${{github.workspace}}/build
shell: bash
run: |
Expand Down
75 changes: 73 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ project(ttg

if (NOT DEFINED CMAKE_CXX_STANDARD)
set(CMAKE_CXX_STANDARD "20" CACHE STRING "The C++ standard")
elseif (${CMAKE_CXX_STANDARD} LESS 20)
message(FATAL_ERROR "TTG requires C++ compiler with C++20, but CMAKE_CXX_STANDARD is set to ${CMAKE_CXX_STANDARD}; bump up CMAKE_CXX_STANDARD to 20 or above")
endif()
if (NOT CMAKE_CXX_EXTENSIONS)
set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "The C++ standard extensions allowed?")
Expand All @@ -52,10 +54,13 @@ set(CMAKE_INSTALL_CMAKEDIR "lib/cmake/ttg"
########################################
option(TTG_PARSEC_USE_BOOST_SERIALIZATION "Whether to select Boost serialization methods in PaRSEC backend" ON)
option(TTG_ENABLE_CUDA "Whether to TTG will look for CUDA" OFF)
option(TTG_ENABLE_HIP "Whether to TTG will look for HIP" OFF)
option(TTG_ENABLE_LEVEL_ZERO "Whether to TTG will look for Intel oneAPI Level Zero" OFF)
option(TTG_EXAMPLES "Whether to build examples" OFF)
option(TTG_ENABLE_ASAN "Whether to enable address sanitizer" OFF)

option(TTG_FETCH_BOOST "Whether to fetch+build Boost, if missing" OFF)
option(TTG_IGNORE_BUNDLED_EXTERNALS "Whether to skip installation and use of bundled external depenedencies (Boost.CallableTraits)" OFF)
option(TTG_IGNORE_BUNDLED_EXTERNALS "Whether to skip installation and use of bundled external dependencies (Boost.CallableTraits)" OFF)
option(TTG_ENABLE_TRACE "Whether to enable ttg::trace() output" OFF)
# See https://medium.com/@alasher/colored-c-compiler-output-with-ninja-clang-gcc-10bfe7f2b949
option (FORCE_COLORED_OUTPUT "Always produce ANSI-colored output (GNU/Clang only)." TRUE)
Expand All @@ -67,6 +72,12 @@ if (FORCE_COLORED_OUTPUT)
endif ()
endif (FORCE_COLORED_OUTPUT)

if (TTG_ENABLE_ASAN)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
endif (TTG_ENABLE_ASAN)

set(TTG_HIP_PLATFORM "__HIP_PLATFORM_AMD__" CACHE STRING "Which platform to use when compiling HIP-related code (default: __HIP_PLATFORM_AMD__)")
##########################
#### prerequisites
##########################
Expand All @@ -82,7 +93,7 @@ endif (BUILD_TESTING)
#### optional prerequisites
###########################
# Boost
include(FindOrFetchBoost)
include("${PROJECT_SOURCE_DIR}/cmake/modules/FindOrFetchBoost.cmake")
# Cereal
#include(FindOrFetchCereal)
# C++ coroutines
Expand All @@ -104,7 +115,53 @@ if (TTG_ENABLE_CUDA)
if (TARGET CUDA::cudart)
set(TTG_HAVE_CUDART True CACHE BOOL "TTG supports execution on CUDA devices")
endif()
endif(TTG_ENABLE_CUDA)

if (TTG_ENABLE_HIP)
# HIP LANGUAGE introduced in 3.21
cmake_minimum_required(VERSION 3.21)
include(CheckLanguage)
check_language(HIP)
if(CMAKE_HIP_COMPILER)
enable_language(HIP)
endif(CMAKE_HIP_COMPILER)
set(TTG_HAVE_HIP ${CMAKE_HIP_COMPILER} CACHE BOOL "True if TTG supports compiling .hip files")

find_package(hipblas)
if (TARGET roc::hipblas)
set(TTG_HAVE_HIPBLAS True CACHE BOOL "TTG detected support for hipBLAS")
endif()

find_package(hipsolver)
if (TARGET roc::hipsolver)
set(TTG_HAVE_HIPSOLVER True CACHE BOOL "TTG detected support for hipSolver")
endif()
add_compile_definitions(${TTG_HIP_PLATFORM})
endif(TTG_ENABLE_HIP)

if (TTG_ENABLE_LEVEL_ZERO)
find_package(level-zero)
set(TTG_HAVE_LEVEL_ZERO ${LEVEL_ZERO_FOUND} CACHE BOOL "True if TTG provide support for Intel Level Zero")
if(TTG_HAVE_LEVEL_ZERO)
include_directories("${LEVEL_ZERO_INCLUDE_DIR}/level_zero/")
find_package(DPCPP)
if(DPCPP_EXECUTABLE)
set(TTG_HAVE_DPCPP TRUE CACHE BOOL "True if TTG knows how to compile DPCPP code")
message(STATUS "Found Intel level-zero ${LEVEL_ZERO_VERSION} in -I${LEVEL_ZERO_INCLUDE_DIR} / -L${LEVEL_ZERO_LIBRARY_DIR}")
message(STATUS "Found dpcpp in ${DPCPP_EXECUTABLE}")

find_package(MKL)
else(DPCPP_EXECUTABLE)
set(TTG_HAVE_DPCPP FALSE CACHE BOOL "True if TTG knows how to compile DPCPP code")
endif(DPCPP_EXECUTABLE)
endif(TTG_HAVE_LEVEL_ZERO)
endif(TTG_ENABLE_LEVEL_ZERO)

set(_ttg_have_device FALSE)
if (TTG_HAVE_CUDA OR TTG_HAVE_HIP OR TTG_HAVE_LEVEL_ZERO)
set(_ttg_have_device TRUE)
endif()
set(TTG_HAVE_DEVICE ${_ttg_have_device} CACHE BOOL "True if TTG has support for any device programming model")

##########################
#### prerequisite runtimes
Expand All @@ -120,6 +177,20 @@ if (TARGET MADworld)
message(STATUS "MADNESS_FOUND=1")
endif(TARGET MADworld)

####################################################
#### Check for MPIX_Query_[cuda|rocm]_support
#### Open MPI provides mpi-ext.h for such extensions
#### so check for that first.
####################################################
find_package(MPI)
set(TTG_HAVE_MPI MPI_FOUND)
if (MPI_FOUND)
include(CheckIncludeFiles)
set(CMAKE_REQUIRED_INCLUDES ${MPI_C_INCLUDE_DIRS})
check_include_files("mpi-ext.h" TTG_HAVE_MPIEXT)
else(MPI_FOUND)
set(TTG_HAVE_MPIEXT $<BOOL:false>)
endif(MPI_FOUND)

##########################
#### Examples
Expand Down
45 changes: 26 additions & 19 deletions INSTALL.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ $ cmake --build ttg/build --target install
TTG is usable only on POSIX systems.

## mandatory prerequisites
- [CMake](https://cmake.org/), version 3.14 or higher
- C++ compiler with support for the [C++17 standard](http://www.iso.org/standard/68564.html), or a more recent standard. This includes the following compilers:
- [GNU C++](https://gcc.gnu.org/), version 7.0 or higher
- [Clang](https://clang.llvm.org/), version 5 or higher
- [Apple Clang](https://en.wikipedia.org/wiki/Xcode), version 9.3 or higher
- [Intel C++ compiler](https://software.intel.com/en-us/c-compilers), version 19 or higher
- [CMake](https://cmake.org/), version 3.14 or higher; version 3.21 or higher is required to support execution on HIP/ROCm-capable devices.
- C++ compiler with support for the [C++20 standard](http://www.iso.org/standard/68564.html), or a more recent standard. This includes the following compilers:
- [GNU C++](https://gcc.gnu.org/), version 10.0 or higher; GCC is the only compiler that can be used for accelerator programming.
- [Clang](https://clang.llvm.org/), version 10 or higher
- [Apple Clang](https://en.wikipedia.org/wiki/Xcode), version 10.0 or higher
- [Intel C++ compiler](https://software.intel.com/en-us/c-compilers), version 2021.1 or higher
- one or more of the following runtimes:
- [PaRSEC](https://bitbucket.org/icldistcomp/parsec): this distributed-memory runtime is the primary runtime intended for high-performance implementation of TTG
- [MADNESS](https://github.org/m-a-d-n-e-s-s/madness): this distributed-memory runtime is to be used primarily for developmental purposes
Expand All @@ -27,12 +27,15 @@ While the list of prerequisites is short, note that the runtimes have many more
Also: it is _strongly_ recommended that the runtimes are built as parts of the TTG build process (this requires some of the optional prerequisites, listed below). This will make sure that the correct versions of the runtimes are used.

## optional prerequisites
- [Git](https://git-scm.com) 1.8 or later: needed to obtain the source code for PaRSEC or MADNESS runtimes
- [Boost](https://boost.org/) version 1.66 or later: needed to use TTG with classes serializable by the [Boost.Serialization](https://www.boost.org/doc/libs/master/libs/serialization/doc/index.html) library.
- The [Boost.Serialization](https://www.boost.org/doc/libs/master/libs/serialization/doc/index.html) library is not header-only, i.e., it must be compiled.
- If the Boost package is not detected TTG can download and build Boost as part of its build process; to do that configure TTG with the CMake cache variable `TTG_FETCH_BOOST` set to `ON` (e.g., by adding `-DTTG_FETCH_BOOST=ON` to the CMake executable command line)
- *Note to package maintainers*: TTG also requires Boost.CallableTraits; if Boost is not found or built, TTG installs and uses a bundled copy of Boost.CallableTraits. To avoid the installation and use of the bundled Boost.CallableTraits configure TTG with the CMake cache variable `TTG_IGNORE_BUNDLED_EXTERNALS` set to `ON`.
- ([Doxygen](http://www.doxygen.nl/), version 1.8.12 or later: needed for building documentation
- [Git](https://git-scm.com): needed to obtain the source code for any prerequisite built from source code as part of TTG, such as PaRSEC or MADNESS runtimes
- [Boost](https://boost.org/) version 1.81 or later. If the Boost package is not detected TTG can download and build Boost as part of its build process, but this is NOT recommended, you should obtain Boost via the system or third-party package manager. Experts may try to build Boost from source as part of TTG by configuring it with the CMake cache variable `TTG_FETCH_BOOST` set to `ON` (e.g., by adding `-DTTG_FETCH_BOOST=ON` to the CMake executable command line). The following primary Boost libraries/modules (and their transitive dependents) are used:
- (required) [Boost.CallableTraits](): used to introspect generic callables given to `make_tt`. P.S. TTG has a bundled copy of `Boost.CallableTraits` which is used and installed if Boost is not found or built from source. To avoid the installation and use of the bundled Boost.CallableTraits configure TTG with the CMake cache variable `TTG_IGNORE_BUNDLED_EXTERNALS` set to `ON`.
- (optional) [Boost.Serialization](https://www.boost.org/doc/libs/master/libs/serialization/doc/index.html): needed to use TTG with classes serializable by the [Boost.Serialization](https://www.boost.org/doc/libs/master/libs/serialization/doc/index.html) library. Note that `Boost.Serialization` is not header-only, i.e., it must be compiled. This is only required if TTG is configured with CMake cache variable `TTG_PARSEC_USE_BOOST_SERIALIZATION` set to `ON`.
- ([Doxygen](http://www.doxygen.nl/), version 1.8.12 or later: needed for building documentation.
- for execution on GPGPUs and other accelerators, the following are required:
- [CUDA compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on NVIDIA's CUDA-enabled accelerators. CUDA 11 or later is required.
- [HIP/ROCm compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on AMD's ROCm-enabled accelerators.
- [oneAPI DPC++/SYCL/LevelZero compiler and runtime](https://developer.nvidia.com/cuda-zone) -- for execution on Intel accelerators.

## transitive prerequisites

Expand Down Expand Up @@ -60,10 +63,14 @@ TTG includes several examples that may require additional prerequisites. These a

## useful cmake cache variables:

| Variable |Default | Description |
|--------------------------------|--------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `BUILD_TESTING` | `ON` | whether target `check-ttg` and its relatives will actually build and run unit tests |
| `TTG_EXAMPLES` | `OFF` | whether target `check-ttg` and its relatives will actually build and run examples; setting this to `ON` will cause detection of several optional prerequisites, and (if missing) building from source |
| `TTG_ENABLE_TRACE` | `OFF` | setting this to `ON` will enable the ability to instrument TTG code for tracing (see `ttg::trace()`, etc.); if this is set to `OFF`, `ttg::trace()` is a no-op |
| `TTG_FETCH_BOOST` | `OFF` | whether to download and build Boost automatically, if missing |
| `TTG_IGNORE_BUNDLED_EXTERNALS` | `OFF` | whether to install and use bundled external dependencies (currently, only Boost.CallableTraits) |
| Variable |Default | Description |
|--------------------------------------|--------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| `TTG_ENABLE_CUDA` | `OFF` | whether to enable CUDA device support |
| `TTG_ENABLE_HIP` | `OFF` | whether to enable HIP/ROCm device support |
| `TTG_ENABLE_LEVEL_ZERO` | `OFF` | whether to enable Intel oneAPI Level Zero device support |
| `BUILD_TESTING` | `ON` | whether target `check-ttg` and its relatives will actually build and run unit tests |
| `TTG_EXAMPLES` | `OFF` | whether target `check-ttg` and its relatives will actually build and run examples; setting this to `ON` will cause detection of several optional prerequisites, and (if missing) building from source |
| `TTG_ENABLE_TRACE` | `OFF` | setting this to `ON` will enable the ability to instrument TTG code for tracing (see `ttg::trace()`, etc.); if this is set to `OFF`, `ttg::trace()` is a no-op |
| `TTG_PARSEC_USE_BOOST_SERIALIZATION` | `OFF` | whether to use Boost.Serialization for serialization for the PaRSEC backend; if this is set to `OFF`, PaRSEC backend will only be able to use trivially-copyable data types or, if MADNESS backend is available, MADNESS-serializable types. |
| `TTG_FETCH_BOOST` | `OFF` | whether to download and build Boost automatically, if missing |
| `TTG_IGNORE_BUNDLED_EXTERNALS` | `OFF` | whether to install and use bundled external dependencies (currently, only Boost.CallableTraits) |
16 changes: 9 additions & 7 deletions cmake/modules/ExternalDependenciesVersions.cmake
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
# for each dependency track both current and previous id (the variable for the latter must contain PREVIOUS)
# to be able to auto-update them

# need Boost.CallableTraits (header only, part of Boost 1.66 released in Dec 2017) for wrap.h to work
set(TTG_TRACKED_BOOST_VERSION 1.66)
set(TTG_TRACKED_CATCH2_VERSION 2.13.1)
set(TTG_TRACKED_VG_CMAKE_KIT_TAG 7ea2d4d3f8854b9e417f297fd74d6fc49aa13fd5) # used to provide "real" FindOrFetchBoost
set(TTG_TRACKED_CATCH2_VERSION 3.5.0)
set(TTG_TRACKED_CEREAL_VERSION 1.3.0)
set(TTG_TRACKED_MADNESS_TAG 31d803325623de75371774feffb0270c796bea24)
set(TTG_TRACKED_PARSEC_TAG 9fc74b6f165605a133125d8a5b62cf55642c1907)
set(TTG_TRACKED_BTAS_TAG d73153ad9bc41a177e441ef04eceff7fab0c766d)
set(TTG_TRACKED_TILEDARRAY_TAG f6f5039b54ee5a1180106351cac05e0d33e14c62)
set(TTG_TRACKED_MADNESS_TAG 2eb3bcf0138127ee2dbc651f1aabd3e9b0def4e3)
set(TTG_TRACKED_PARSEC_TAG 0b3140f58ad9dc78a3d64da9fd73ecc7f443ece7)
set(TTG_TRACKED_BTAS_TAG 4e8f5233aa7881dccdfcc37ce07128833926d3c2)
set(TTG_TRACKED_TILEDARRAY_TAG 493c109379a1b64ddd5ef59f7e33b95633b68d73)

# need Boost.CallableTraits (header only, part of Boost 1.66 released in Dec 2017) for wrap.h to work
set(TTG_OLDEST_BOOST_VERSION 1.66)
Loading

0 comments on commit f89cf45

Please sign in to comment.