Give a first attack on parallel RCM algorithm implementation.

spikegpu · Jun 12, 2014 · e7e8840 · e7e8840
1 parent 5743cb9
commit e7e8840
Show file tree

Hide file tree

Showing 12 changed files with 2,224 additions and 0 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -0,0 +1,37 @@
+CMAKE_MINIMUM_REQUIRED(VERSION 2.8)
+PROJECT(rcm)
+INCLUDE(cmake/SBELUtils.cmake)
+
+SET(EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
+
+enable_cuda_support()
+MESSAGE(STATUS "Cuda version: ${CUDA_VERSION}")
+
+SET(RCM_HEADERS
+	rcm/common.h
+	rcm/rcm_um.h
+	rcm/exception.h
+	rcm/timer.h
+  )
+
+SET(RCM_CUHEADERS
+  rcm/device/kernels.cuh
+  )
+
+SET(MMIO_FILES
+  mm_io/mm_io.h
+  mm_io/mm_io.c
+  )
+
+SOURCE_GROUP("Headers" FILES ${MC64_HEADERS})
+SOURCE_GROUP("CUDA Headers" FILES ${MC64_CUHEADERS})
+SOURCE_GROUP("MM_IO" FILES ${MMIO_FILES})
+
+INCLUDE_DIRECTORIES(
+    ${CMAKE_SOURCE_DIR}
+    )
+
+IF(NOT (${CUDA_VERSION} VERSION_LESS "6.0"))
+  cuda_add_executable(driver_um driver_um.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
+  cuda_add_executable(testing testing.cu ${MC64_HEADERS} ${MC64_CUHEADERS} ${MMIO_FILES})
+ENDIF()
diff --git a/SBELUtils.cmake b/SBELUtils.cmake
@@ -0,0 +1,166 @@
+####################################################
+##   Only modify if you know what you're doing.   ##
+####################################################
+
+
+# Helps Eclipse/CDT find our include directories
+set(CMAKE_VERBOSE_MAKEFILE on)
+
+# Detect the bitness of our machine (eg 32- or 64-bit)
+# C-equiv: sizeof(void*)
+# Alt: 8*sizeof(void*)
+math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})
+
+# For non-multi-configuration generators (eg, make, Eclipse)
+# The Visual Studio generator creates a single project with all these
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
+SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable MPI Support              - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring MPI options
+macro(enable_mpi_support)
+
+		find_package("MPI" REQUIRED)
+
+		# Add the MPI-specific compiler and linker flags
+		# Also, search for #includes in MPI's paths
+
+		list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
+		list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
+		include_directories(${MPI_C_INCLUDE_PATH})
+
+		list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
+		list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
+		include_directories(${MPI_CXX_INCLUDE_PATH})
+
+endmacro(enable_mpi_support)
+# Done configuring MPI Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable OpenMP Support           - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring OpenMP options
+macro(enable_openmp_support)
+
+		find_package("OpenMP" REQUIRED)
+
+		# Add the OpenMP-specific compiler and linker flags
+		list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
+		list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})
+
+endmacro(enable_openmp_support)
+# Done configuring OpenMP Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable CUDA Support             - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring CUDA options
+# This is ugly...
+macro(enable_cuda_support)
+
+		# Hide a number of options from the default CMake screen
+		mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
+		mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
+		mark_as_advanced(CLEAR CUDA_FAST_MATH)
+		mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
+		mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)
+
+		# select Compute Capability
+		# This needs to be manually updated when devices with new CCs come out
+		set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
+		set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13"	"20" "21" "30" "35")
+
+		# Enable fast-math for CUDA (_not_ GCC)
+		set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")
+
+		# Tell nvcc to use a separate compiler for non-CUDA code.
+		# This is useful if you need to use an older of GCC than comes by default
+		set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
+		set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")
+
+		# Shows register usage, etc
+		set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")
+
+
+		# Let's get going...
+		find_package("CUDA" REQUIRED)
+
+		# Frequently used in the examples
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)
+
+		set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
+				${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)
+
+		# these are no longer needed
+		#		# Find path to shrutil libs, from CUDA SDK
+		#		find_library(LIBSHRUTIL
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBSHRUTIL_DBG
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#
+		#		# Find path to cutil libs, from CUDA SDK
+		#		find_library(LIBCUTIL
+		#				NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBCUTIL_DBG
+		#				NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+
+		# Set custom compiler flags
+		set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)
+
+		if(CUDA_USE_CUSTOM_COMPILER)
+				mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
+				list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
+		else()
+				mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
+		endif()
+
+		# Macro for setting the Compute Capability
+		macro(set_compute_capability cc)
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
+		endmacro(set_compute_capability)
+
+		# Tell nvcc to compile for the selected Compute Capability
+		# This can also be called from the main CMakeLists.txt to enable
+		# support for additional CCs
+		set_compute_capability(${CUDA_DEVICE_VERSION})
+
+		# Enable fast-math if selected
+		if(CUDA_FAST_MATH)
+				list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
+		endif()
+
+		# Enable verbose compile if selected
+		if(CUDA_VERBOSE_PTX)
+				list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
+		endif()
+endmacro(enable_cuda_support)
+# Done configuring CUDA options
diff --git a/cmake/SBELUtils.cmake b/cmake/SBELUtils.cmake
@@ -0,0 +1,166 @@
+####################################################
+##   Only modify if you know what you're doing.   ##
+####################################################
+
+
+# Helps Eclipse/CDT find our include directories
+set(CMAKE_VERBOSE_MAKEFILE on)
+
+# Detect the bitness of our machine (eg 32- or 64-bit)
+# C-equiv: sizeof(void*)
+# Alt: 8*sizeof(void*)
+math(EXPR CMAKE_ARCH_BITNESS 8*${CMAKE_SIZEOF_VOID_P})
+
+# For non-multi-configuration generators (eg, make, Eclipse)
+# The Visual Studio generator creates a single project with all these
+set(CMAKE_BUILD_TYPE "Release" CACHE STRING "For single-configuration generators (e.g. make) set the type of build: Release, Debug, RelWithDebugInfo, MinSizeRel")
+SET_PROPERTY(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Release" "Debug" "RelWithDebugInfo" "MinSizeRel")
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable MPI Support              - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring MPI options
+macro(enable_mpi_support)
+
+		find_package("MPI" REQUIRED)
+
+		# Add the MPI-specific compiler and linker flags
+		# Also, search for #includes in MPI's paths
+
+		list(APPEND CMAKE_C_COMPILE_FLAGS ${MPI_C_COMPILE_FLAGS})
+		list(APPEND CMAKE_C_LINK_FLAGS ${MPI_C_LINK_FLAGS})
+		include_directories(${MPI_C_INCLUDE_PATH})
+
+		list(APPEND CMAKE_CXX_COMPILE_FLAGS ${MPI_CXX_COMPILE_FLAGS})
+		list(APPEND CMAKE_CXX_LINK_FLAGS ${MPI_CXX_LINK_FLAGS})
+		include_directories(${MPI_CXX_INCLUDE_PATH})
+
+endmacro(enable_mpi_support)
+# Done configuring MPI Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable OpenMP Support           - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring OpenMP options
+macro(enable_openmp_support)
+
+		find_package("OpenMP" REQUIRED)
+
+		# Add the OpenMP-specific compiler and linker flags
+		list(APPEND CMAKE_CXX_FLAGS ${OpenMP_CXX_FLAGS})
+		list(APPEND CMAKE_C_FLAGS ${OpenMP_C_FLAGS})
+
+endmacro(enable_openmp_support)
+# Done configuring OpenMP Options
+
+
+####################################################
+## ---------------------------------------------- ##
+## -                                            - ##
+## -            Enable CUDA Support             - ##
+## -                                            - ##
+## ---------------------------------------------- ##
+####################################################
+
+# Begin configuring CUDA options
+# This is ugly...
+macro(enable_cuda_support)
+
+		# Hide a number of options from the default CMake screen
+		mark_as_advanced(CLEAR CUDA_BUILD_CUBIN)
+		mark_as_advanced(CLEAR CUDA_SDK_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_TOOLKIT_ROOT_DIR)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_BUILD)
+		mark_as_advanced(CLEAR CUDA_FAST_MATH)
+		mark_as_advanced(CLEAR CUDA_USE_CUSTOM_COMPILER)
+		mark_as_advanced(CLEAR CUDA_VERBOSE_PTX)
+		mark_as_advanced(CLEAR CUDA_DEVICE_VERSION)
+
+		# select Compute Capability
+		# This needs to be manually updated when devices with new CCs come out
+		set(CUDA_DEVICE_VERSION "20" CACHE STRING "CUDA Device Version")
+		set_property(CACHE CUDA_DEVICE_VERSION PROPERTY STRINGS "10" "11" "12" "13"	"20" "21" "30" "35")
+
+		# Enable fast-math for CUDA (_not_ GCC)
+		set(CUDA_FAST_MATH TRUE CACHE BOOL "Use Fast Math Operations")
+
+		# Tell nvcc to use a separate compiler for non-CUDA code.
+		# This is useful if you need to use an older of GCC than comes by default
+		set(CUDA_USE_CUSTOM_COMPILER FALSE CACHE BOOL "Use Custom Compiler")
+		set(CUDA_CUSTOM_COMPILER "" CACHE STRING "Custom C++ Compiler for CUDA If Needed")
+
+		# Shows register usage, etc
+		set(CUDA_VERBOSE_PTX TRUE CACHE BOOL "Show Verbose Kernel Info During Compilation")
+
+
+		# Let's get going...
+		find_package("CUDA" REQUIRED)
+
+		# Frequently used in the examples
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/common/inc)
+		cuda_include_directories(${CUDA_SDK_ROOT_DIR}/../shared/inc)
+
+		set(CUDA_SDK_LIB_DIR ${CUDA_SDK_ROOT_DIR}/common/lib
+				${CUDA_SDK_ROOT_DIR}/lib ${CUDA_SDK_ROOT_DIR}/../shared/lib)
+
+		# these are no longer needed
+		#		# Find path to shrutil libs, from CUDA SDK
+		#		find_library(LIBSHRUTIL
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS} shrutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBSHRUTIL_DBG
+		#				NAMES shrUtils${CMAKE_ARCH_BITNESS}D shrutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#
+		#		# Find path to cutil libs, from CUDA SDK
+		#		find_library(LIBCUTIL
+		#				NAMES cutil${CMAKE_ARCH_BITNESS} cutil_${CMAKE_SYSTEM_PROCESSOR}
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+		#		find_library(LIBCUTIL_DBG
+		#				NAMES cutil${arch}D cutil_${CMAKE_SYSTEM_PROCESSOR}D
+		#				PATHS ${CUDA_SDK_LIB_DIR})
+
+		# Set custom compiler flags
+		set(CUDA_NVCC_FLAGS "" CACHE STRING "" FORCE)
+
+		if(CUDA_USE_CUSTOM_COMPILER)
+				mark_as_advanced(CLEAR CUDA_CUSTOM_COMPILER)
+				list(APPEND CUDA_NVCC_FLAGS "-ccbin=${CUDA_CUSTOM_COMPILER}")
+		else()
+				mark_as_advanced(FORCE CUDA_CUSTOM_COMPILER)
+		endif()
+
+		# Macro for setting the Compute Capability
+		macro(set_compute_capability cc)
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=sm_${cc}")
+				list(APPEND CUDA_NVCC_FLAGS "-gencode=arch=compute_${cc},code=compute_${cc}")
+		endmacro(set_compute_capability)
+
+		# Tell nvcc to compile for the selected Compute Capability
+		# This can also be called from the main CMakeLists.txt to enable
+		# support for additional CCs
+		set_compute_capability(${CUDA_DEVICE_VERSION})
+
+		# Enable fast-math if selected
+		if(CUDA_FAST_MATH)
+				list(APPEND CUDA_NVCC_FLAGS "-use_fast_math")
+		endif()
+
+		# Enable verbose compile if selected
+		if(CUDA_VERBOSE_PTX)
+				list(APPEND CUDA_NVCC_FLAGS "--ptxas-options=-v")
+		endif()
+endmacro(enable_cuda_support)
+# Done configuring CUDA options