From fd996417ade6777eedb57970134c5afe488eeb20 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matthias=20M=C3=B6ller?= <m_moeller@live.de>
Date: Sat, 7 Sep 2024 22:23:14 +0200
Subject: [PATCH] cuda_add_library

---
 dlib/CMakeLists.txt                           | 157 ++++++++++++++++--
 dlib/cmake_utils/findCUDNN.cmake              |  76 ---------
 dlib/cmake_utils/test_for_cuda/CMakeLists.txt |   5 +-
 .../cmake_utils/test_for_cudnn/CMakeLists.txt |   5 +-
 4 files changed, 153 insertions(+), 90 deletions(-)
 delete mode 100644 dlib/cmake_utils/findCUDNN.cmake

diff --git a/dlib/CMakeLists.txt b/dlib/CMakeLists.txt
index e7bfd29c74..491b78f57e 100644
--- a/dlib/CMakeLists.txt
+++ b/dlib/CMakeLists.txt
@@ -649,13 +649,144 @@ if (NOT TARGET dlib)
 
 
       if (DLIB_USE_CUDA)
-         find_package(CUDAToolkit 9.1)
-         set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake_utils)
-         find_package(CUDNN)
-         find_package(OpenMP)
-         set(openmp_libraries ${OpenMP_CXX_FLAGS}) 
+         find_package(CUDAToolkit QUIET)
 
-         if (CUDAToolkit_FOUND AND CUDNN_FOUND AND OPENMP_FOUND)
+         if (CUDA_VERSION VERSION_GREATER 9.1 AND CMAKE_VERSION VERSION_LESS 3.12.2)
+            # This bit of weirdness is to work around a bug in cmake 
+            list(REMOVE_ITEM CUDA_CUBLAS_LIBRARIES "CUDA_cublas_device_LIBRARY-NOTFOUND")
+         endif()
+
+
+         if (CUDAToolkit_FOUND AND MSVC AND NOT TARGET CUDA::cublas AND "${CMAKE_SIZEOF_VOID_P}" EQUAL "4")
+            message(WARNING "You have CUDA installed, but we can't use it unless you put visual studio in 64bit mode.")
+            set(CUDA_FOUND 0)
+         endif()
+
+         if (CUDAToolkit_FOUND)
+
+            # There is some bug in cmake that causes it to mess up the
+            # -std=c++11 option if you let it propagate it to nvcc in some
+            # cases.  So instead we disable this and manually include
+            # things from CMAKE_CXX_FLAGS in the CUDA_NVCC_FLAGS list below.
+            if (APPLE)
+               set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+               # Grab all the -D flags from CMAKE_CXX_FLAGS so we can pass them
+               # to nvcc.
+               string(REGEX MATCHALL "-D[^ ]*" FLAGS_FOR_NVCC "${CMAKE_CXX_FLAGS}")
+
+               # Check if we are being built as part of a pybind11 module. 
+               if (COMMAND pybind11_add_module)
+                  # Don't export unnecessary symbols.
+                  list(APPEND FLAGS_FOR_NVCC "-Xcompiler=-fvisibility=hidden")
+               endif()
+            endif()
+
+            set(CUDA_HOST_COMPILATION_CPP ON)
+            string(REPLACE "," ";" DLIB_CUDA_COMPUTE_CAPABILITIES ${DLIB_USE_CUDA_COMPUTE_CAPABILITIES})
+            foreach(CAP ${DLIB_CUDA_COMPUTE_CAPABILITIES})
+                list(APPEND CUDA_NVCC_FLAGS "-gencode arch=compute_${CAP},code=[sm_${CAP},compute_${CAP}]")
+            endforeach()
+            # Note that we add __STRICT_ANSI__ to avoid freaking out nvcc with gcc specific
+            # magic in the standard C++ header files (since nvcc uses gcc headers on linux).
+            list(APPEND CUDA_NVCC_FLAGS "-D__STRICT_ANSI__;-D_MWAITXINTRIN_H_INCLUDED;-D_FORCE_INLINES;${FLAGS_FOR_NVCC}")
+            list(APPEND CUDA_NVCC_FLAGS ${active_preprocessor_switches})
+            if (NOT DLIB_IN_PROJECT_BUILD)
+               LIST(APPEND CUDA_NVCC_FLAGS -DDLIB__CMAKE_GENERATED_A_CONFIG_H_FILE)
+            endif()
+            if (NOT MSVC)
+               list(APPEND CUDA_NVCC_FLAGS "-std=c++14")
+            endif()
+            if (CMAKE_POSITION_INDEPENDENT_CODE)
+               # sometimes this setting isn't propagated to NVCC, which then causes the
+               # compile to fail.  So make sure it's propagated.
+               if (NOT MSVC) # Visual studio doesn't have -fPIC so don't do it in that case.
+                  list(APPEND CUDA_NVCC_FLAGS "-Xcompiler -fPIC")
+               endif()
+            endif()
+
+            include(cmake_utils/test_for_cudnn/find_cudnn.txt)
+
+            include(CheckLanguage)
+            check_language(CUDA)
+            set(cuda_compiler_found OFF)
+            if (CMAKE_CUDA_COMPILER)
+               set(cuda_compiler_found ON)
+            else()
+               message(STATUS " *** Cannot find CUDA compiler. If you are on windows using Visual Studio, make sure to install 'Visual Studio Integration' in the cuda installer.")
+            endif()
+
+            if (cudnn AND cudnn_include AND cuda_compiler_found AND NOT DEFINED cuda_test_compile_worked AND NOT DEFINED cudnn_test_compile_worked)
+               # make sure cuda is really working by doing a test compile
+               enable_language(CUDA)
+               message(STATUS "Building a CUDA test project to see if your compiler is compatible with CUDA...")
+
+               set(CUDA_TEST_CMAKE_FLAGS 
+                  "-DCMAKE_PREFIX_PATH=${CMAKE_PREFIX_PATH}"
+                  "-DCMAKE_INCLUDE_PATH=${CMAKE_INCLUDE_PATH}"
+                  "-DCMAKE_LIBRARY_PATH=${CMAKE_LIBRARY_PATH}")
+
+               if (NOT MSVC) # see https://github.com/davisking/dlib/issues/363
+                  list(APPEND CUDA_TEST_CMAKE_FLAGS "-DCUDA_HOST_COMPILER=${CUDA_HOST_COMPILER}")
+               endif()
+
+               try_compile(cuda_test_compile_worked 
+                  ${PROJECT_BINARY_DIR}/cuda_test_build 
+                  ${PROJECT_SOURCE_DIR}/cmake_utils/test_for_cuda cuda_test
+                  CMAKE_FLAGS ${CUDA_TEST_CMAKE_FLAGS}
+                  OUTPUT_VARIABLE try_compile_output_message
+                  )
+               if (NOT cuda_test_compile_worked)
+                  string(REPLACE "\n" "\n   ***   " try_compile_output_message "${try_compile_output_message}")
+                  message(STATUS "*****************************************************************************************************************")
+                  message(STATUS "*** CUDA was found but your compiler failed to compile a simple CUDA program so dlib isn't going to use CUDA. ")
+                  message(STATUS "*** The output of the failed CUDA test compile is shown below: ")
+                  message(STATUS "*** ")
+                  message(STATUS "***   ${try_compile_output_message}")
+                  message(STATUS "*****************************************************************************************************************")
+               else()
+                  message(STATUS "Building a cuDNN test project to check if you have the right version of cuDNN installed...")
+                  try_compile(cudnn_test_compile_worked 
+                     ${PROJECT_BINARY_DIR}/cudnn_test_build 
+                     ${PROJECT_SOURCE_DIR}/cmake_utils/test_for_cudnn cudnn_test
+                     CMAKE_FLAGS ${CUDA_TEST_CMAKE_FLAGS}
+                     OUTPUT_VARIABLE try_compile_output_message
+                     )
+                  if (NOT cudnn_test_compile_worked)
+                     string(REPLACE "\n" "\n   ***   " try_compile_output_message "${try_compile_output_message}")
+                     message(STATUS "*****************************************************************************************************")
+                     message(STATUS "*** Found cuDNN, but we failed to compile the dlib/cmake_utils/test_for_cudnn project. ")
+                     message(STATUS "*** You either have an unsupported version of cuDNN or something is wrong with your cudDNN install.")
+                     message(STATUS "*** Since a functional cuDNN is not found DLIB WILL NOT USE CUDA. ")
+                     message(STATUS "*** The output of the failed test_for_cudnn build is: ")
+                     message(STATUS "*** ")
+                     message(STATUS "***   ${try_compile_output_message}")
+                     message(STATUS "*****************************************************************************************************")
+                  endif()
+               endif()
+            endif()
+            # Also find OpenMP since cuSOLVER needs it.  Importantly, we only
+            # look for one to link to if our use of BLAS, specifically the
+            # Intel MKL, hasn't already decided what to use.  This is because
+            # it makes the MKL bug out if you link to another openmp lib other
+            # than Intel's when you use the MKL. I'm also not really sure when
+            # explicit linking to openmp became unnecessary, but for
+            # sufficiently older versions of cuda it was needed.  Then in
+            # versions of cmake newer than 3.11 linking to openmp started to
+            # mess up the switches passed to nvcc, so you can't just leave
+            # these "try to link to openmp" statements here going forward.  Fun
+            # times.
+            if (CUDA_VERSION VERSION_LESS "9.1" AND NOT openmp_libraries AND NOT MSVC AND NOT XCODE AND NOT APPLE)
+               find_package(OpenMP)
+               if (OPENMP_FOUND)
+                  set(openmp_libraries ${OpenMP_CXX_FLAGS}) 
+               else()
+                  message(STATUS "*** Didn't find OpenMP, which is required to use CUDA. ***")
+                  set(CUDA_FOUND 0)
+               endif()
+            endif()
+         endif()
+
+         if (CUDAToolkit_FOUND AND cudnn AND cuda_test_compile_worked AND cudnn_test_compile_worked AND cudnn_include)
             set(source_files ${source_files} 
                cuda/cuda_dlib.cu 
                cuda/cudnn_dlibapi.cpp
@@ -665,19 +796,22 @@ if (NOT TARGET dlib)
                cuda/cuda_data_ptr.cpp
                cuda/gpu_data.cpp
                )
+            list (APPEND dlib_needed_private_libraries CUDA::toolkit)
             list (APPEND dlib_needed_private_libraries CUDA::cublas)
-            list (APPEND dlib_needed_private_libraries ${CUDNN_LIBRARY_PATH})
+            list (APPEND dlib_needed_private_libraries ${cudnn})
             list (APPEND dlib_needed_private_libraries CUDA::curand)
             list (APPEND dlib_needed_private_libraries CUDA::cusolver)
             list (APPEND dlib_needed_private_libraries CUDA::cudart)
-            list (APPEND dlib_needed_private_libraries ${openmp_libraries})
+            if(openmp_libraries)
+               list (APPEND dlib_needed_private_libraries ${openmp_libraries})
+            endif()
 
-            include_directories(${CUDAToolkit_INCLUDE_DIRS} ${CUDNN_INCLUDE_PATH})
+            include_directories(${cudnn_include})
             message(STATUS "Enabling CUDA support for dlib.  DLIB WILL USE CUDA, compute capabilities: ${DLIB_CUDA_COMPUTE_CAPABILITIES}")
          else()
             set(DLIB_USE_CUDA OFF CACHE BOOL ${DLIB_USE_BLAS_STR} FORCE )
             toggle_preprocessor_switch(DLIB_USE_CUDA)
-            if (NOT CUDA_FOUND)
+            if (NOT CUDAToolkit_FOUND)
                message(STATUS "DID NOT FIND CUDA")
             endif()
             message(STATUS "Disabling CUDA support for dlib.  DLIB WILL NOT USE CUDA")
@@ -762,6 +896,9 @@ if (NOT TARGET dlib)
       target_compile_options(dlib PRIVATE "-DDLIB_CHECK_FOR_VERSION_MISMATCH=${DLIB_CHECK_FOR_VERSION_MISMATCH}")
    endif()
 
+   if (DLIB_USE_CUDA)
+      set_target_properties(dlib PROPERTIES CUDA_ARCHITECTURES ${DLIB_CUDA_COMPUTE_CAPABILITIES})
+   endif()
 
    # Allow the unit tests to ask us to compile the all/source.cpp file just to make sure it compiles.
    if (DLIB_TEST_COMPILE_ALL_SOURCE_CPP)
diff --git a/dlib/cmake_utils/findCUDNN.cmake b/dlib/cmake_utils/findCUDNN.cmake
deleted file mode 100644
index 0c900fcd4e..0000000000
--- a/dlib/cmake_utils/findCUDNN.cmake
+++ /dev/null
@@ -1,76 +0,0 @@
-# Find the CUDNN libraries
-#
-# The following variables are optionally searched for defaults
-#  CUDNN_ROOT: Base directory where CUDNN is found
-#  CUDNN_INCLUDE_DIR: Directory where CUDNN header is searched for
-#  CUDNN_LIBRARY: Directory where CUDNN library is searched for
-#  CUDNN_STATIC: Are we looking for a static library? (default: no)
-#
-# The following are set after configuration is done:
-#  CUDNN_FOUND
-#  CUDNN_INCLUDE_PATH
-#  CUDNN_LIBRARY_PATH
-#
-
-include(FindPackageHandleStandardArgs)
-
-set(CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} CACHE PATH "Folder containing NVIDIA cuDNN")
-if (DEFINED $ENV{CUDNN_ROOT_DIR})
-  message(WARNING "CUDNN_ROOT_DIR is deprecated. Please set CUDNN_ROOT instead.")
-endif()
-list(APPEND CUDNN_ROOT $ENV{CUDNN_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR})
-
-# Compatible layer for CMake <3.12. CUDNN_ROOT will be accounted in for searching paths and libraries for CMake >=3.12.
-list(APPEND CMAKE_PREFIX_PATH ${CUDNN_ROOT})
-
-set(CUDNN_INCLUDE_DIR $ENV{CUDNN_INCLUDE_DIR} CACHE PATH "Folder containing NVIDIA cuDNN header files")
-
-find_path(CUDNN_INCLUDE_PATH cudnn.h
-  HINTS ${CUDNN_INCLUDE_DIR}
-  PATH_SUFFIXES cuda/include cuda include)
-
-option(CUDNN_STATIC "Look for static CUDNN" OFF)
-if (CUDNN_STATIC)
-  set(CUDNN_LIBNAME "libcudnn_static.a")
-else()
-  set(CUDNN_LIBNAME "cudnn")
-endif()
-
-set(CUDNN_LIBRARY $ENV{CUDNN_LIBRARY} CACHE PATH "Path to the cudnn library file (e.g., libcudnn.so)")
-if (CUDNN_LIBRARY MATCHES ".*cudnn_static.a" AND NOT CUDNN_STATIC)
-  message(WARNING "CUDNN_LIBRARY points to a static library (${CUDNN_LIBRARY}) but CUDNN_STATIC is OFF.")
-endif()
-
-find_library(CUDNN_LIBRARY_PATH ${CUDNN_LIBNAME}
-  PATHS ${CUDNN_LIBRARY}
-  PATH_SUFFIXES lib lib64 cuda/lib cuda/lib64 lib/x64)
-
-find_package_handle_standard_args(CUDNN DEFAULT_MSG CUDNN_LIBRARY_PATH CUDNN_INCLUDE_PATH)
-
-if(CUDNN_FOUND)
-  # Get cuDNN version
-  if(EXISTS ${CUDNN_INCLUDE_PATH}/cudnn_version.h)
-    file(READ ${CUDNN_INCLUDE_PATH}/cudnn_version.h CUDNN_HEADER_CONTENTS)
-  else()
-    file(READ ${CUDNN_INCLUDE_PATH}/cudnn.h CUDNN_HEADER_CONTENTS)
-  endif()
-  string(REGEX MATCH "define CUDNN_MAJOR * +([0-9]+)"
-               CUDNN_VERSION_MAJOR "${CUDNN_HEADER_CONTENTS}")
-  string(REGEX REPLACE "define CUDNN_MAJOR * +([0-9]+)" "\\1"
-               CUDNN_VERSION_MAJOR "${CUDNN_VERSION_MAJOR}")
-  string(REGEX MATCH "define CUDNN_MINOR * +([0-9]+)"
-               CUDNN_VERSION_MINOR "${CUDNN_HEADER_CONTENTS}")
-  string(REGEX REPLACE "define CUDNN_MINOR * +([0-9]+)" "\\1"
-               CUDNN_VERSION_MINOR "${CUDNN_VERSION_MINOR}")
-  string(REGEX MATCH "define CUDNN_PATCHLEVEL * +([0-9]+)"
-               CUDNN_VERSION_PATCH "${CUDNN_HEADER_CONTENTS}")
-  string(REGEX REPLACE "define CUDNN_PATCHLEVEL * +([0-9]+)" "\\1"
-               CUDNN_VERSION_PATCH "${CUDNN_VERSION_PATCH}")
-  # Assemble cuDNN version
-  if(NOT CUDNN_VERSION_MAJOR)
-    set(CUDNN_VERSION "?")
-  else()
-    set(CUDNN_VERSION
-        "${CUDNN_VERSION_MAJOR}.${CUDNN_VERSION_MINOR}.${CUDNN_VERSION_PATCH}")
-  endif()
-endif()
\ No newline at end of file
diff --git a/dlib/cmake_utils/test_for_cuda/CMakeLists.txt b/dlib/cmake_utils/test_for_cuda/CMakeLists.txt
index 7392501f15..f0a0fd0b00 100644
--- a/dlib/cmake_utils/test_for_cuda/CMakeLists.txt
+++ b/dlib/cmake_utils/test_for_cuda/CMakeLists.txt
@@ -7,8 +7,9 @@ add_definitions(-DDLIB_USE_CUDA)
 
 # Override the FindCUDA.cmake setting to avoid duplication of host flags if using a toolchain:
 option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-find_package(CUDA 7.5 REQUIRED)
+find_package(CUDAToolkit 7.5 REQUIRED)
 set(CUDA_HOST_COMPILATION_CPP ON)
 list(APPEND CUDA_NVCC_FLAGS "-arch=sm_50;-std=c++14;-D__STRICT_ANSI__;-D_MWAITXINTRIN_H_INCLUDED;-D_FORCE_INLINES")
 
-cuda_add_library(cuda_test STATIC cuda_test.cu )
+enable_language(CUDA)
+add_library(cuda_test STATIC cuda_test.cu )
diff --git a/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt b/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
index 4a0315a327..9bfed91430 100644
--- a/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
+++ b/dlib/cmake_utils/test_for_cudnn/CMakeLists.txt
@@ -4,7 +4,7 @@ project(cudnn_test)
 
 # Override the FindCUDA.cmake setting to avoid duplication of host flags if using a toolchain:
 option(CUDA_PROPAGATE_HOST_FLAGS "Propage C/CXX_FLAGS and friends to the host compiler via -Xcompile" OFF)
-find_package(CUDA 7.5 REQUIRED)
+find_package(CUDAToolkit 7.5 REQUIRED)
 set(CUDA_HOST_COMPILATION_CPP ON)
 list(APPEND CUDA_NVCC_FLAGS "-arch=sm_50;-std=c++14;-D__STRICT_ANSI__")
 add_definitions(-DDLIB_USE_CUDA)
@@ -13,6 +13,7 @@ include(find_cudnn.txt)
 
 if (cudnn_include AND cudnn)
    include_directories(${cudnn_include})
-   cuda_add_library(cudnn_test STATIC ../../cuda/cudnn_dlibapi.cpp ${cudnn} )
+   enable_language(CUDA)
+   add_library(cudnn_test STATIC ../../cuda/cudnn_dlibapi.cpp ${cudnn} )
    target_compile_features(cudnn_test PUBLIC cxx_std_14)
 endif()