-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Give a first attack on parallel RCM algorithm implementation.
- Loading branch information
1 parent
5743cb9
commit e7e8840
Showing
12 changed files
with
2,224 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
CMAKE_MINIMUM_REQUIRED(VERSION 2.8) | ||
PROJECT(rcm) | ||
INCLUDE(cmake/SBELUtils.cmake) | ||
|
||
SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin) | ||
|
||
enable_cuda_support() | ||
MESSAGE(STATUS "Cuda version: ${CUDA_VERSION}") | ||
|
||
SET(RCM_HEADERS | ||
rcm/common.h | ||
rcm/rcm_um.h | ||
rcm/exception.h | ||
rcm/timer.h | ||
) | ||
|
||
SET(RCM_CUHEADERS | ||
rcm/device/kernels.cuh | ||
) | ||
|
||
SET(MMIO_FILES | ||
mm_io/mm_io.h | ||
mm_io/mm_io.c | ||
) | ||
|
||
SOURCE_GROUP("Headers" FILES ${MC64_HEADERS}) | ||
SOURCE_GROUP("CUDA Headers" FILES ${MC64_CUHEADERS}) | ||
SOURCE_GROUP("MM_IO" FILES ${MMIO_FILES}) | ||
|
||
INCLUDE_DIRECTORIES( | ||
${CMAKE_SOURCE_DIR} | ||
) | ||
|
||
IF(NOT (${CUDA_VERSION} VERSION_LESS "6.0")) | ||
cuda_add_executable(driver_um driver_um.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES}) | ||
cuda_add_executable(testing testing.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES}) | ||
ENDIF() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#################################################### | ||
## Only modify if you know what you're doing. ## | ||
#################################################### | ||
|
||
|
||
# Helps Eclipse/CDT find our include directories | ||
set(CMAKE_VERBOSE_MAKEFILE on) | ||
|
||
# Detect the bitness of our machine (eg 32- or 64-bit) | ||
# C-equiv: sizeof(void*) | ||
# Alt: 8*sizeof(void*) | ||
math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P}) | ||
|
||
# For non-multi-configuration generators (eg, make, Eclipse) | ||
# The Visual Studio generator creates a single project with all these | ||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel") | ||
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel") | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable MPI Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring MPI options | ||
macro(enable_mpi_support) | ||
|
||
find_package("MPI" REQUIRED) | ||
|
||
# Add the MPI-specific compiler and linker flags | ||
# Also, search for #includes in MPI's paths | ||
|
||
list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS}) | ||
list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) | ||
include_directories(${MPI_C_INCLUDE_PATH}) | ||
|
||
list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS}) | ||
list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS}) | ||
include_directories(${MPI_CXX_INCLUDE_PATH}) | ||
|
||
endmacro(enable_mpi_support) | ||
# Done configuring MPI Options | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable OpenMP Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring OpenMP options | ||
macro(enable_openmp_support) | ||
|
||
find_package("OpenMP" REQUIRED) | ||
|
||
# Add the OpenMP-specific compiler and linker flags | ||
list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS}) | ||
list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS}) | ||
|
||
endmacro(enable_openmp_support) | ||
# Done configuring OpenMP Options | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable CUDA Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring CUDA options | ||
# This is ugly... | ||
macro(enable_cuda_support) | ||
|
||
# Hide a number of options from the default CMake screen | ||
mark_as_advanced(CLEAR CUDA_BUILD_CUBIN) | ||
mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR) | ||
mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR) | ||
mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD) | ||
mark_as_advanced(CLEAR CUDA_FAST_MATH) | ||
mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER) | ||
mark_as_advanced(CLEAR CUDA_VERBOSE_PTX) | ||
mark_as_advanced(CLEAR CUDA_DEVICE_VERSION) | ||
|
||
# select Compute Capability | ||
# This needs to be manually updated when devices with new CCs come out | ||
set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version") | ||
set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13" "20" "21" "30" "35") | ||
|
||
# Enable fast-math for CUDA (_not_ GCC) | ||
set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations") | ||
|
||
# Tell nvcc to use a separate compiler for non-CUDA code. | ||
# This is useful if you need to use an older of GCC than comes by default | ||
set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler") | ||
set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed") | ||
|
||
# Shows register usage, etc | ||
set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation") | ||
|
||
|
||
# Let's get going... | ||
find_package("CUDA" REQUIRED) | ||
|
||
# Frequently used in the examples | ||
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc) | ||
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc) | ||
|
||
set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib | ||
${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib) | ||
|
||
# these are no longer needed | ||
# # Find path to shrutil libs, from CUDA SDK | ||
# find_library(LIBSHRUTIL | ||
# NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR} | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# find_library(LIBSHRUTIL_DBG | ||
# NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# | ||
# # Find path to cutil libs, from CUDA SDK | ||
# find_library(LIBCUTIL | ||
# NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR} | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# find_library(LIBCUTIL_DBG | ||
# NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
|
||
# Set custom compiler flags | ||
set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE) | ||
|
||
if(CUDA_USE_CUSTOM_COMPILER) | ||
mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER) | ||
list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}") | ||
else() | ||
mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER) | ||
endif() | ||
|
||
# Macro for setting the Compute Capability | ||
macro(set_compute_capability cc) | ||
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}") | ||
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}") | ||
endmacro(set_compute_capability) | ||
|
||
# Tell nvcc to compile for the selected Compute Capability | ||
# This can also be called from the main CMakeLists.txt to enable | ||
# support for additional CCs | ||
set_compute_capability(${CUDA_DEVICE_VERSION}) | ||
|
||
# Enable fast-math if selected | ||
if(CUDA_FAST_MATH) | ||
list(APPEND CUDA_NVCC_FLAGS "-use_fast_math") | ||
endif() | ||
|
||
# Enable verbose compile if selected | ||
if(CUDA_VERBOSE_PTX) | ||
list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v") | ||
endif() | ||
endmacro(enable_cuda_support) | ||
# Done configuring CUDA options |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,166 @@ | ||
#################################################### | ||
## Only modify if you know what you're doing. ## | ||
#################################################### | ||
|
||
|
||
# Helps Eclipse/CDT find our include directories | ||
set(CMAKE_VERBOSE_MAKEFILE on) | ||
|
||
# Detect the bitness of our machine (eg 32- or 64-bit) | ||
# C-equiv: sizeof(void*) | ||
# Alt: 8*sizeof(void*) | ||
math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P}) | ||
|
||
# For non-multi-configuration generators (eg, make, Eclipse) | ||
# The Visual Studio generator creates a single project with all these | ||
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel") | ||
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel") | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable MPI Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring MPI options | ||
macro(enable_mpi_support) | ||
|
||
find_package("MPI" REQUIRED) | ||
|
||
# Add the MPI-specific compiler and linker flags | ||
# Also, search for #includes in MPI's paths | ||
|
||
list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS}) | ||
list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS}) | ||
include_directories(${MPI_C_INCLUDE_PATH}) | ||
|
||
list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS}) | ||
list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS}) | ||
include_directories(${MPI_CXX_INCLUDE_PATH}) | ||
|
||
endmacro(enable_mpi_support) | ||
# Done configuring MPI Options | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable OpenMP Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring OpenMP options | ||
macro(enable_openmp_support) | ||
|
||
find_package("OpenMP" REQUIRED) | ||
|
||
# Add the OpenMP-specific compiler and linker flags | ||
list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS}) | ||
list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS}) | ||
|
||
endmacro(enable_openmp_support) | ||
# Done configuring OpenMP Options | ||
|
||
|
||
#################################################### | ||
## ---------------------------------------------- ## | ||
## - - ## | ||
## - Enable CUDA Support - ## | ||
## - - ## | ||
## ---------------------------------------------- ## | ||
#################################################### | ||
|
||
# Begin configuring CUDA options | ||
# This is ugly... | ||
macro(enable_cuda_support) | ||
|
||
# Hide a number of options from the default CMake screen | ||
mark_as_advanced(CLEAR CUDA_BUILD_CUBIN) | ||
mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR) | ||
mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR) | ||
mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD) | ||
mark_as_advanced(CLEAR CUDA_FAST_MATH) | ||
mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER) | ||
mark_as_advanced(CLEAR CUDA_VERBOSE_PTX) | ||
mark_as_advanced(CLEAR CUDA_DEVICE_VERSION) | ||
|
||
# select Compute Capability | ||
# This needs to be manually updated when devices with new CCs come out | ||
set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version") | ||
set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13" "20" "21" "30" "35") | ||
|
||
# Enable fast-math for CUDA (_not_ GCC) | ||
set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations") | ||
|
||
# Tell nvcc to use a separate compiler for non-CUDA code. | ||
# This is useful if you need to use an older of GCC than comes by default | ||
set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler") | ||
set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed") | ||
|
||
# Shows register usage, etc | ||
set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation") | ||
|
||
|
||
# Let's get going... | ||
find_package("CUDA" REQUIRED) | ||
|
||
# Frequently used in the examples | ||
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc) | ||
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc) | ||
|
||
set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib | ||
${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib) | ||
|
||
# these are no longer needed | ||
# # Find path to shrutil libs, from CUDA SDK | ||
# find_library(LIBSHRUTIL | ||
# NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR} | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# find_library(LIBSHRUTIL_DBG | ||
# NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# | ||
# # Find path to cutil libs, from CUDA SDK | ||
# find_library(LIBCUTIL | ||
# NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR} | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
# find_library(LIBCUTIL_DBG | ||
# NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D | ||
# PATHS ${CUDA_SDK_LIB_DIR}) | ||
|
||
# Set custom compiler flags | ||
set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE) | ||
|
||
if(CUDA_USE_CUSTOM_COMPILER) | ||
mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER) | ||
list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}") | ||
else() | ||
mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER) | ||
endif() | ||
|
||
# Macro for setting the Compute Capability | ||
macro(set_compute_capability cc) | ||
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}") | ||
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}") | ||
endmacro(set_compute_capability) | ||
|
||
# Tell nvcc to compile for the selected Compute Capability | ||
# This can also be called from the main CMakeLists.txt to enable | ||
# support for additional CCs | ||
set_compute_capability(${CUDA_DEVICE_VERSION}) | ||
|
||
# Enable fast-math if selected | ||
if(CUDA_FAST_MATH) | ||
list(APPEND CUDA_NVCC_FLAGS "-use_fast_math") | ||
endif() | ||
|
||
# Enable verbose compile if selected | ||
if(CUDA_VERBOSE_PTX) | ||
list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v") | ||
endif() | ||
endmacro(enable_cuda_support) | ||
# Done configuring CUDA options |
Oops, something went wrong.