Skip to content

Commit

Permalink
Give a first attack on parallel RCM algorithm implementation.
Browse files Browse the repository at this point in the history
  • Loading branch information
ihcinihsdk committed Jun 12, 2014
1 parent 5743cb9 commit e7e8840
Show file tree
Hide file tree
Showing 12 changed files with 2,224 additions and 0 deletions.
37 changes: 37 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
PROJECT(rcm)
INCLUDE(cmake/SBELUtils.cmake)

SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)

enable_cuda_support()
MESSAGE(STATUS "Cuda version: ${CUDA_VERSION}")

SET(RCM_HEADERS
rcm/common.h
rcm/rcm_um.h
rcm/exception.h
rcm/timer.h
)

SET(RCM_CUHEADERS
rcm/device/kernels.cuh
)

SET(MMIO_FILES
mm_io/mm_io.h
mm_io/mm_io.c
)

SOURCE_GROUP("Headers" FILES ${MC64_HEADERS})
SOURCE_GROUP("CUDA Headers" FILES ${MC64_CUHEADERS})
SOURCE_GROUP("MM_IO" FILES ${MMIO_FILES})

INCLUDE_DIRECTORIES(
${CMAKE_SOURCE_DIR}
)

IF(NOT (${CUDA_VERSION} VERSION_LESS "6.0"))
cuda_add_executable(driver_um driver_um.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
cuda_add_executable(testing testing.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
ENDIF()
166 changes: 166 additions & 0 deletions SBELUtils.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
####################################################
## Only modify if you know what you're doing. ##
####################################################


# Helps Eclipse/CDT find our include directories
set(CMAKE_VERBOSE_MAKEFILE on)

# Detect the bitness of our machine (eg 32- or 64-bit)
# C-equiv: sizeof(void*)
# Alt: 8*sizeof(void*)
math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})

# For non-multi-configuration generators (eg, make, Eclipse)
# The Visual Studio generator creates a single project with all these
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable MPI Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring MPI options
macro(enable_mpi_support)

find_package("MPI" REQUIRED)

# Add the MPI-specific compiler and linker flags
# Also, search for #includes in MPI's paths

list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
include_directories(${MPI_C_INCLUDE_PATH})

list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
include_directories(${MPI_CXX_INCLUDE_PATH})

endmacro(enable_mpi_support)
# Done configuring MPI Options


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable OpenMP Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring OpenMP options
macro(enable_openmp_support)

find_package("OpenMP" REQUIRED)

# Add the OpenMP-specific compiler and linker flags
list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})

endmacro(enable_openmp_support)
# Done configuring OpenMP Options


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable CUDA Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring CUDA options
# This is ugly...
macro(enable_cuda_support)

# Hide a number of options from the default CMake screen
mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
mark_as_advanced(CLEAR CUDA_FAST_MATH)
mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)

# select Compute Capability
# This needs to be manually updated when devices with new CCs come out
set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13" "20" "21" "30" "35")

# Enable fast-math for CUDA (_not_ GCC)
set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")

# Tell nvcc to use a separate compiler for non-CUDA code.
# This is useful if you need to use an older of GCC than comes by default
set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")

# Shows register usage, etc
set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")


# Let's get going...
find_package("CUDA" REQUIRED)

# Frequently used in the examples
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)

set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)

# these are no longer needed
# # Find path to shrutil libs, from CUDA SDK
# find_library(LIBSHRUTIL
# NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
# PATHS ${CUDA_SDK_LIB_DIR})
# find_library(LIBSHRUTIL_DBG
# NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
# PATHS ${CUDA_SDK_LIB_DIR})
#
# # Find path to cutil libs, from CUDA SDK
# find_library(LIBCUTIL
# NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
# PATHS ${CUDA_SDK_LIB_DIR})
# find_library(LIBCUTIL_DBG
# NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
# PATHS ${CUDA_SDK_LIB_DIR})

# Set custom compiler flags
set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)

if(CUDA_USE_CUSTOM_COMPILER)
mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
else()
mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
endif()

# Macro for setting the Compute Capability
macro(set_compute_capability cc)
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
endmacro(set_compute_capability)

# Tell nvcc to compile for the selected Compute Capability
# This can also be called from the main CMakeLists.txt to enable
# support for additional CCs
set_compute_capability(${CUDA_DEVICE_VERSION})

# Enable fast-math if selected
if(CUDA_FAST_MATH)
list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
endif()

# Enable verbose compile if selected
if(CUDA_VERBOSE_PTX)
list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
endif()
endmacro(enable_cuda_support)
# Done configuring CUDA options
166 changes: 166 additions & 0 deletions cmake/SBELUtils.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
####################################################
## Only modify if you know what you're doing. ##
####################################################


# Helps Eclipse/CDT find our include directories
set(CMAKE_VERBOSE_MAKEFILE on)

# Detect the bitness of our machine (eg 32- or 64-bit)
# C-equiv: sizeof(void*)
# Alt: 8*sizeof(void*)
math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})

# For non-multi-configuration generators (eg, make, Eclipse)
# The Visual Studio generator creates a single project with all these
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable MPI Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring MPI options
macro(enable_mpi_support)

find_package("MPI" REQUIRED)

# Add the MPI-specific compiler and linker flags
# Also, search for #includes in MPI's paths

list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
include_directories(${MPI_C_INCLUDE_PATH})

list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
include_directories(${MPI_CXX_INCLUDE_PATH})

endmacro(enable_mpi_support)
# Done configuring MPI Options


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable OpenMP Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring OpenMP options
macro(enable_openmp_support)

find_package("OpenMP" REQUIRED)

# Add the OpenMP-specific compiler and linker flags
list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})

endmacro(enable_openmp_support)
# Done configuring OpenMP Options


####################################################
## ---------------------------------------------- ##
## - - ##
## - Enable CUDA Support - ##
## - - ##
## ---------------------------------------------- ##
####################################################

# Begin configuring CUDA options
# This is ugly...
macro(enable_cuda_support)

# Hide a number of options from the default CMake screen
mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
mark_as_advanced(CLEAR CUDA_FAST_MATH)
mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)

# select Compute Capability
# This needs to be manually updated when devices with new CCs come out
set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13" "20" "21" "30" "35")

# Enable fast-math for CUDA (_not_ GCC)
set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")

# Tell nvcc to use a separate compiler for non-CUDA code.
# This is useful if you need to use an older of GCC than comes by default
set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")

# Shows register usage, etc
set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")


# Let's get going...
find_package("CUDA" REQUIRED)

# Frequently used in the examples
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)

set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)

# these are no longer needed
# # Find path to shrutil libs, from CUDA SDK
# find_library(LIBSHRUTIL
# NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
# PATHS ${CUDA_SDK_LIB_DIR})
# find_library(LIBSHRUTIL_DBG
# NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
# PATHS ${CUDA_SDK_LIB_DIR})
#
# # Find path to cutil libs, from CUDA SDK
# find_library(LIBCUTIL
# NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
# PATHS ${CUDA_SDK_LIB_DIR})
# find_library(LIBCUTIL_DBG
# NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
# PATHS ${CUDA_SDK_LIB_DIR})

# Set custom compiler flags
set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)

if(CUDA_USE_CUSTOM_COMPILER)
mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
else()
mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
endif()

# Macro for setting the Compute Capability
macro(set_compute_capability cc)
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
endmacro(set_compute_capability)

# Tell nvcc to compile for the selected Compute Capability
# This can also be called from the main CMakeLists.txt to enable
# support for additional CCs
set_compute_capability(${CUDA_DEVICE_VERSION})

# Enable fast-math if selected
if(CUDA_FAST_MATH)
list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
endif()

# Enable verbose compile if selected
if(CUDA_VERBOSE_PTX)
list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
endif()
endmacro(enable_cuda_support)
# Done configuring CUDA options
Loading

0 comments on commit e7e8840

Please sign in to comment.