diff --git a/EESSI-install-software.sh b/EESSI-install-software.sh index 4aac3772cf..b61ca7a579 100755 --- a/EESSI-install-software.sh +++ b/EESSI-install-software.sh @@ -189,19 +189,18 @@ pr_diff=$(ls [0-9]*.diff | head -1) # install any additional required scripts # order is important: these are needed to install a full CUDA SDK in host_injections -install_scripts_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^install_scripts.sh$' > /dev/null; echo $?) -if [ ${install_scripts_changed} == '0' ]; then - # for now, this just reinstalls all scripts. Note the most elegant, but works - ${TOPDIR}/install_scripts.sh --prefix ${EESSI_CVMFS_REPO} -fi +# for now, this just reinstalls all scripts. Note the most elegant, but works +${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX} # Install full CUDA SDK in host_injections # Hardcode this for now, see if it works # TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install -${EESSI_CVMFS_REPO}/gpu_support/nvidia/install_cuda_host_injections.sh 12.1.1 +${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula # Install drivers in host_injections -${EESSI_CVMFS_REPO}/gpu_support/nvidia/link_nvidia_host_libraries.sh +# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works; +# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software) +# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh # use PR patch file to determine in which easystack files stuff was added for easystack_file in $(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing'); do @@ -245,7 +244,8 @@ done echo ">> Creating/updating Lmod cache..." export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua" -if [ ! -f $LMOD_RC ]; then +lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?) +if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH} check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC" fi diff --git a/bot/build.sh b/bot/build.sh index 4af217628e..1622e757e2 100755 --- a/bot/build.sh +++ b/bot/build.sh @@ -176,6 +176,11 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR} declare -a BUILD_STEP_ARGS=() BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}") BUILD_STEP_ARGS+=("--storage" "${STORAGE}") +# add options required to handle NVIDIA support +BUILD_STEP_ARGS+=("--nvidia" "all") +if [[ ! -z ${SHARED_FS_PATH} ]]; then + BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections") +fi # prepare arguments to install_software_layer.sh (specific to build step) declare -a INSTALL_SCRIPT_ARGS=() diff --git a/create_lmodrc.py b/create_lmodrc.py index ae65153a20..0e738a530e 100755 --- a/create_lmodrc.py +++ b/create_lmodrc.py @@ -17,6 +17,85 @@ } """ +GPU_LMOD_RC ="""require("strict") +local hook = require("Hook") +local open = io.open + +local function read_file(path) + local file = open(path, "rb") -- r read mode and b binary mode + if not file then return nil end + local content = file:read "*a" -- *a or *all reads the whole file + file:close() + return content +end + +local function cuda_enabled_load_hook(t) + local frameStk = require("FrameStk"):singleton() + local mt = frameStk:mt() + local simpleName = string.match(t.modFullName, "(.-)/") + -- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections. + -- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse + -- to load the CUDA module and print an informative message on how to set up GPU support for EESSI + local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n" + if simpleName == 'CUDA' then + -- get the full host_injections path + local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections') + -- build final path where the CUDA software should be installed + local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild" + local cudaDirExists = isDir(cudaEasyBuildDir) + if not cudaDirExists then + local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI " + advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI " + advice = advice .. "can find it.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + end + end + -- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker, + -- otherwise, refuse to load the requested module and print error message + local haveGpu = mt:haveProperty(simpleName,"arch","gpu") + if haveGpu then + local arch = os.getenv("EESSI_CPU_FAMILY") or "" + local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt" + local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so" + local cudaDriverExists = isFile(cudaDriverFile) + local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so") + if not (cudaDriverExists or singularityCudaExists) then + local advice = "which relies on the CUDA runtime environment and driver libraries. " + advice = advice .. "In order to be able to use the module, you will need " + advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYou requested to load ", simpleName, " ", advice) + else + -- CUDA driver exists, now we check its version to see if an update is needed + if cudaDriverExists then + local cudaVersion = read_file(cudaVersionFile) + local cudaVersion_req = os.getenv("EESSICUDAVERSION") + -- driver CUDA versions don't give a patch version for CUDA + local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)") + local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)") + local driver_libs_need_update = false + if major < major_req then + driver_libs_need_update = true + elseif major == major_req then + if minor < minor_req then + driver_libs_need_update = true + end + end + if driver_libs_need_update == true then + local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". " + advice = advice .. "Please update your CUDA driver libraries and then " + advice = advice .. "let EESSI know about the update.\\n" + advice = advice .. refer_to_docs + LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice) + end + end + end + end +end + +hook.register("load", cuda_enabled_load_hook) +""" def error(msg): sys.stderr.write("ERROR: %s\n" % msg) @@ -36,6 +115,7 @@ def error(msg): 'dot_lmod': DOT_LMOD, 'prefix': prefix, } +lmodrc_txt += '\n' + GPU_LMOD_RC try: os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True) with open(lmodrc_path, 'w') as fp: diff --git a/create_tarball.sh b/create_tarball.sh index 8510caebf1..faaa9fda6f 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -40,11 +40,23 @@ echo ">> Collecting list of files/directories to include in tarball via ${PWD}.. files_list=${tmpdir}/files.list.txt module_files_list=${tmpdir}/module_files.list.txt +# include Lmod cache and configuration file (lmodrc.lua), +if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then + # skip whiteout files and backup copies of Lmod cache (spiderT.old.*) + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list} +fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then # include Lmod cache and configuration file (lmodrc.lua), # skip whiteout files and backup copies of Lmod cache (spiderT.old.*) - find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list} + find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' >> ${files_list} fi + +# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository +if [ -d ${eessi_version}/scripts ]; then + find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list} +fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then # module files find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} @@ -55,6 +67,7 @@ if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ >> ${module_files_list} fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then # installation directories but only those for which module files were created # Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua' diff --git a/easystacks/software.eessi.io/2023.06/README.md b/easystacks/software.eessi.io/2023.06/README.md new file mode 100644 index 0000000000..733ebf9475 --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/README.md @@ -0,0 +1,7 @@ +File naming matters, since it determines the order in which easystack files are processed. + +Software installed with system toolchain should be installed first, +this includes EasyBuild itself, see `eessi-2023.06-eb-4.8.2-001-system.yml` . + +CUDA installations must be done before CUDA is required as dependency for something +built with a non-system toolchain, see `eessi-2023.06-eb-4.8.2-010-CUDA.yml` . diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-system.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-001-system.yml similarity index 100% rename from easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-system.yml rename to easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-001-system.yml diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml new file mode 100644 index 0000000000..dda274b8db --- /dev/null +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-010-CUDA.yml @@ -0,0 +1,5 @@ +easyconfigs: + - CUDA-12.1.1.eb: + options: + include-easyblocks-from-pr: 3045 + accept-eula-for: CUDA diff --git a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml index 567db44e42..596b9ea21f 100644 --- a/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml +++ b/easystacks/software.eessi.io/2023.06/eessi-2023.06-eb-4.8.2-2023a.yml @@ -35,3 +35,11 @@ easyconfigs: - Boost-1.82.0-GCC-12.3.0.eb - netCDF-4.9.2-gompi-2023a.eb - FFmpeg-6.0-GCCcore-12.3.0.eb + - CUDA-Samples-12.1-GCC-12.3.0-CUDA-12.1.1.eb: + # use easyconfig that only install subset of CUDA samples, + # to circumvent problem with nvcc linking to glibc of host OS, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19189; + # and where additional samples are excluded because they fail to build on aarch64, + # see https://github.com/easybuilders/easybuild-easyconfigs/pull/19451; + options: + from-pr: 19451 diff --git a/eb_hooks.py b/eb_hooks.py index 6fe92c7f7b..27cc873fa1 100644 --- a/eb_hooks.py +++ b/eb_hooks.py @@ -7,7 +7,7 @@ from easybuild.framework.easyconfig.constants import EASYCONFIG_CONSTANTS from easybuild.tools.build_log import EasyBuildError, print_msg from easybuild.tools.config import build_option, update_build_option -from easybuild.tools.filetools import apply_regex_substitutions, copy_file, which +from easybuild.tools.filetools import apply_regex_substitutions, copy_file, remove_file, symlink, which from easybuild.tools.run import run_cmd from easybuild.tools.systemtools import AARCH64, POWER, X86_64, get_cpu_architecture, get_cpu_features from easybuild.tools.toolchain.compiler import OPTARCH_GENERIC @@ -68,6 +68,9 @@ def parse_hook(ec, *args, **kwargs): if ec.name in PARSE_HOOKS: PARSE_HOOKS[ec.name](ec, eprefix) + # inject the GPU property (if required) + ec = inject_gpu_property(ec) + def post_ready_hook(self, *args, **kwargs): """ @@ -393,6 +396,104 @@ def pre_single_extension_isoband(ext, *args, **kwargs): ext.cfg['preinstallopts'] = "sed -i 's/SIGSTKSZ/32768/g' src/testthat/vendor/catch.h && " +def post_sanitycheck_hook(self, *args, **kwargs): + """Main post-sanity-check hook: trigger custom functions based on software name.""" + if self.name in POST_SANITYCHECK_HOOKS: + POST_SANITYCHECK_HOOKS[self.name](self, *args, **kwargs) + + +def post_sanitycheck_cuda(self, *args, **kwargs): + """ + Remove files from CUDA installation that we are not allowed to ship, + and replace them with a symlink to a corresponding installation under host_injections. + """ + if self.name == 'CUDA': + print_msg("Replacing files in CUDA installation that we can not ship with symlinks to host_injections...") + + # read CUDA EULA, construct allowlist based on section 2.6 that specifies list of files that can be shipped + eula_path = os.path.join(self.installdir, 'EULA.txt') + relevant_eula_lines = [] + with open(eula_path) as infile: + copy = False + for line in infile: + if line.strip() == "2.6. Attachment A": + copy = True + continue + elif line.strip() == "2.7. Attachment B": + copy = False + continue + elif copy: + relevant_eula_lines.append(line) + + # create list without file extensions, they're not really needed and they only complicate things + allowlist = ['EULA', 'README'] + file_extensions = ['.so', '.a', '.h', '.bc'] + for line in relevant_eula_lines: + for word in line.split(): + if any(ext in word for ext in file_extensions): + allowlist.append(os.path.splitext(word)[0]) + allowlist = sorted(set(allowlist)) + self.log.info("Allowlist for files in CUDA installation that can be redistributed: " + ', '.join(allowlist)) + + # Do some quick sanity checks for things we should or shouldn't have in the list + if 'nvcc' in allowlist: + raise EasyBuildError("Found 'nvcc' in allowlist: %s" % allowlist) + if 'libcudart' not in allowlist: + raise EasyBuildError("Did not find 'libcudart' in allowlist: %s" % allowlist) + + # iterate over all files in the CUDA installation directory + for dir_path, _, files in os.walk(self.installdir): + for filename in files: + full_path = os.path.join(dir_path, filename) + # we only really care about real files, i.e. not symlinks + if not os.path.islink(full_path): + # check if the current file is part of the allowlist + basename = os.path.splitext(filename)[0] + if basename in allowlist: + self.log.debug("%s is found in allowlist, so keeping it: %s", basename, full_path) + else: + self.log.debug("%s is not found in allowlist, so replacing it with symlink: %s", + basename, full_path) + # if it is not in the allowlist, delete the file and create a symlink to host_injections + host_inj_path = full_path.replace('versions', 'host_injections') + # make sure source and target of symlink are not the same + if full_path == host_inj_path: + raise EasyBuildError("Source (%s) and target (%s) are the same location, are you sure you " + "are using this hook for an EESSI installation?", + full_path, host_inj_path) + remove_file(full_path) + symlink(host_inj_path, full_path) + else: + raise EasyBuildError("CUDA-specific hook triggered for non-CUDA easyconfig?!") + + +def inject_gpu_property(ec): + """ + Add 'gpu' property, via modluafooter easyconfig parameter + """ + ec_dict = ec.asdict() + # Check if CUDA is in the dependencies, if so add the 'gpu' Lmod property + if ('CUDA' in [dep[0] for dep in iter(ec_dict['dependencies'])]): + ec.log.info("Injecting gpu as Lmod arch property and envvar with CUDA version") + key = 'modluafooter' + value = 'add_property("arch","gpu")' + cuda_version = 0 + for dep in iter(ec_dict['dependencies']): + # Make CUDA a build dependency only (rpathing saves us from link errors) + if 'CUDA' in dep[0]: + cuda_version = dep[1] + ec_dict['dependencies'].remove(dep) + if dep not in ec_dict['builddependencies']: + ec_dict['builddependencies'].append(dep) + value = '\n'.join([value, 'setenv("EESSICUDAVERSION","%s")' % cuda_version]) + if key in ec_dict: + if not value in ec_dict[key]: + ec[key] = '\n'.join([ec_dict[key], value]) + else: + ec[key] = value + return ec + + PARSE_HOOKS = { 'CGAL': parse_hook_cgal_toolchainopts_precise, 'fontconfig': parse_hook_fontconfig_add_fonts, @@ -424,3 +525,7 @@ def pre_single_extension_isoband(ext, *args, **kwargs): 'isoband': pre_single_extension_isoband, 'testthat': pre_single_extension_testthat, } + +POST_SANITYCHECK_HOOKS = { + 'CUDA': post_sanitycheck_cuda, +} diff --git a/eessi_container.sh b/eessi_container.sh index 81d7be81ad..d6e9558202 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -118,7 +118,6 @@ STORAGE= LIST_REPOS=0 MODE="shell" SETUP_NVIDIA=0 -ADDITIONAL_SINGULARITY_FLAGS= REPOSITORY="EESSI" RESUME= SAVE= @@ -437,12 +436,14 @@ BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}:${TMP_IN_CONTAINER}" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" +declare -a ADDITIONAL_CONTAINER_OPTIONS=() + # Configure anything we need for NVIDIA GPUs and CUDA installation if [[ ${SETUP_NVIDIA} -eq 1 ]]; then if [[ "${NVIDIA_MODE}" == "run" || "${NVIDIA_MODE}" == "all" ]]; then # Give singularity the appropriate flag - ADDITIONAL_SINGULARITY_FLAGS="--nv ${ADDITIONAL_SINGULARITY_FLAGS}" - [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_SINGULARITY_FLAGS=${ADDITIONAL_SINGULARITY_FLAGS}" + ADDITIONAL_CONTAINER_OPTIONS+=("--nv") + [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_CONTAINER_OPTIONS=${ADDITIONAL_CONTAINER_OPTIONS[@]}" fi if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then # Add additional bind mounts to allow CUDA to install within a container @@ -621,8 +622,8 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then fi echo "Launching container with command (next line):" -echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_SINGULARITY_FLAGS} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" -singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_SINGULARITY_FLAGS} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" +echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" +singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" exit_code=$? # 6. save tmp if requested (arg -s|--save) diff --git a/install_scripts.sh b/install_scripts.sh index 209d953c88..6e6cd825ac 100755 --- a/install_scripts.sh +++ b/install_scripts.sh @@ -8,12 +8,29 @@ display_help() { echo " -h | --help - display this usage information" } +compare_and_copy() { + if [ "$#" -ne 2 ]; then + echo "Usage of function: compare_and_copy " + return 1 + fi + + source_file="$1" + destination_file="$2" + + if [ ! -f "$destination_file" ] || ! diff -q "$source_file" "$destination_file" ; then + cp "$source_file" "$destination_file" + echo "File $1 copied to $2" + else + echo "Files $1 and $2 are identical. No copy needed." + fi +} + POSITIONAL_ARGS=() while [[ $# -gt 0 ]]; do case $1 in - -o|--prefix) + -p|--prefix) INSTALL_PREFIX="$2" shift 2 ;; @@ -38,25 +55,27 @@ set -- "${POSITIONAL_ARGS[@]}" TOPDIR=$(dirname $(realpath $0)) # Subdirs for generic scripts -SCRIPTS_DIR_SOURCE=${TOPDIR}/scripts/ # Source dir -SCRIPTS_DIR_TARGET=${INSTALL_PREFIX}/scripts/ # Target dir +SCRIPTS_DIR_SOURCE=${TOPDIR}/scripts # Source dir +SCRIPTS_DIR_TARGET=${INSTALL_PREFIX}/scripts # Target dir # Create target dir mkdir -p ${SCRIPTS_DIR_TARGET} # Copy scripts into this prefix +echo "copying scripts from ${SCRIPTS_DIR_SOURCE} to ${SCRIPTS_DIR_TARGET}" for file in utils.sh; do - cp ${SCRIPTS_DIR_SOURCE}/${file} ${SCRIPTS_DIR_TARGET}/${file} + compare_and_copy ${SCRIPTS_DIR_SOURCE}/${file} ${SCRIPTS_DIR_TARGET}/${file} done # Subdirs for GPU support -NVIDIA_GPU_SUPPORT_DIR_SOURCE=${TOPDIR}/gpu_support/nvidia/ # Source dir -NVIDIA_GPU_SUPPORT_DIR_TARGET=${INSTALL_PREFIX}/gpu_support/nvidia/ # Target dir +NVIDIA_GPU_SUPPORT_DIR_SOURCE=${SCRIPTS_DIR_SOURCE}/gpu_support/nvidia # Source dir +NVIDIA_GPU_SUPPORT_DIR_TARGET=${SCRIPTS_DIR_TARGET}/gpu_support/nvidia # Target dir # Create target dir mkdir -p ${NVIDIA_GPU_SUPPORT_DIR_TARGET} # Copy files from this directory into the prefix # To be on the safe side, we dont do recursive copies, but we are explicitely copying each individual file we want to add -for file in install_cuda_host_injections.sh link_nvidia_host_injections.sh; do - cp ${NVIDIA_GPU_SUPPORT_DIR_SOURCE}/${file} ${NVIDIA_GPU_SUPPORT_DIR_TARGET}/${file} +echo "copying scripts from ${NVIDIA_GPU_SUPPORT_DIR_SOURCE} to ${NVIDIA_GPU_SUPPORT_DIR_TARGET}" +for file in install_cuda_host_injections.sh link_nvidia_host_libraries.sh; do + compare_and_copy ${NVIDIA_GPU_SUPPORT_DIR_SOURCE}/${file} ${NVIDIA_GPU_SUPPORT_DIR_TARGET}/${file} done diff --git a/gpu_support/nvidia/install_cuda_host_injections.sh b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh similarity index 97% rename from gpu_support/nvidia/install_cuda_host_injections.sh rename to scripts/gpu_support/nvidia/install_cuda_host_injections.sh index f02f0da02e..a9310d817a 100755 --- a/gpu_support/nvidia/install_cuda_host_injections.sh +++ b/scripts/gpu_support/nvidia/install_cuda_host_injections.sh @@ -14,7 +14,7 @@ # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) -source "$TOPDIR"/../../scripts/utils.sh +source "$TOPDIR"/../../utils.sh # Function to display help message show_help() { @@ -200,7 +200,9 @@ else eb --prefix="$tmpdir" ${extra_args} --accept-eula-for=CUDA --hooks="$tmpdir"/none.py --installpath="${cuda_install_parent}"/ "${cuda_easyconfig}" ret=$? if [ $ret -ne 0 ]; then - fatal_error "CUDA installation failed, please check EasyBuild logs..." + eb_last_log=$(unset EB_VERBOSE; eb --last-log) + cp -a ${eb_last_log} . + fatal_error "CUDA installation failed, please check EasyBuild logs $(basename ${eb_last_log})..." else echo_green "CUDA installation at ${cuda_install_parent}/software/CUDA/${install_cuda_version} succeeded!" fi diff --git a/gpu_support/nvidia/link_nvidia_host_libraries.sh b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh similarity index 97% rename from gpu_support/nvidia/link_nvidia_host_libraries.sh rename to scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh index 6458be7fae..e6ff110797 100755 --- a/gpu_support/nvidia/link_nvidia_host_libraries.sh +++ b/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh @@ -5,7 +5,7 @@ # Initialise our bash functions TOPDIR=$(dirname $(realpath $BASH_SOURCE)) -source "$TOPDIR"/../../scripts/utils.sh +source "$TOPDIR"/../../utils.sh # We rely on ldconfig to give us the location of the libraries on the host command_name="ldconfig" @@ -36,7 +36,7 @@ if [ ${#found_paths[@]} -gt 0 ]; then host_ldconfig=${found_paths[0]} else error="$command_name not found in PATH or only found in paths starting with $exclude_prefix." - fatal_error $error + fatal_error "$error" fi # Make sure EESSI is initialised (doesn't matter what version) @@ -52,7 +52,7 @@ if $nvidia_smi_command > /dev/null; then host_cuda_version=$(nvidia-smi -q --display=COMPUTE | grep CUDA | awk 'NF>1{print $NF}') else error="Failed to successfully execute\n $nvidia_smi_command\n" - fatal_error $error + fatal_error "$error" fi # Let's make sure the driver libraries are not already in place @@ -71,7 +71,7 @@ if [ -e "$host_injection_driver_version_file" ]; then rm $host_injection_driver_dir/* if [ $? -ne 0 ]; then error="Unable to remove files under '$host_injection_driver_dir'." - fatal_error $error + fatal_error "$error" fi fi fi