From f023863d3d2e501732011cf1750140689961e63d Mon Sep 17 00:00:00 2001 From: Richard Top Date: Wed, 17 Jan 2024 17:08:29 +0000 Subject: [PATCH] Further changes to get closer to EESSI --- configure_easybuild | 2 +- create_directory_tarballs.sh | 4 +- create_tarball.sh | 7 ++++ eessi_container.sh | 72 ++++++++++++++++++++++++++++++++++-- 4 files changed, 78 insertions(+), 7 deletions(-) diff --git a/configure_easybuild b/configure_easybuild index 7dca1ce682..c1bd1d390b 100644 --- a/configure_easybuild +++ b/configure_easybuild @@ -26,7 +26,7 @@ fi # note: filtering Bison may break some installations, like Qt5 (see https://github.com/EESSI/software-layer/issues/49) # filtering pkg-config breaks R-bundle-Bioconductor installation (see also https://github.com/easybuilders/easybuild-easyconfigs/pull/11104) # problems occur when filtering pkg-config with gnuplot too (picks up Lua 5.1 from $EPREFIX rather than from Lua 5.3 dependency) -DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,Lua,M4,makeinfo,ncurses,util-linux,XZ,zlib +DEPS_TO_FILTER=Autoconf,Automake,Autotools,binutils,bzip2,DBus,flex,gettext,gperf,help2man,intltool,libreadline,libtool,M4,makeinfo,ncurses,util-linux,XZ,zlib # For aarch64 we need to also filter out Yasm. # See https://github.com/easybuilders/easybuild-easyconfigs/issues/11190 if [[ "$EESSI_CPU_FAMILY" == "aarch64" ]]; then diff --git a/create_directory_tarballs.sh b/create_directory_tarballs.sh index dd644ec800..a1f05b5464 100755 --- a/create_directory_tarballs.sh +++ b/create_directory_tarballs.sh @@ -1,7 +1,5 @@ #!/bin/bash -SOFTWARE_LAYER_TARBALL_URL=https://github.com/NorESSI/software-layer/tarball/nessi.no-2023.06 - set -eo pipefail if [ $# -ne 1 ]; then @@ -11,6 +9,8 @@ fi version=$1 +SOFTWARE_LAYER_TARBALL_URL=https://github.com/NorESSI/software-layer/tarball/nessi.no-2023.06 + TOPDIR=$(dirname $(realpath $0)) source $TOPDIR/scripts/utils.sh diff --git a/create_tarball.sh b/create_tarball.sh index 8510caebf1..f6239cf28e 100755 --- a/create_tarball.sh +++ b/create_tarball.sh @@ -45,6 +45,12 @@ if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod ]; then # skip whiteout files and backup copies of Lmod cache (spiderT.old.*) find ${eessi_version}/software/${os}/${cpu_arch_subdir}/.lmod -type f | egrep -v '/\.wh\.|spiderT.old' > ${files_list} fi + +# include scripts that were copied by install_scripts.sh, which we want to ship in EESSI repository +if [ -d ${eessi_version}/scripts ]; then + find ${eessi_version}/scripts -type f | grep -v '/\.wh\.' >> ${files_list} +fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then # module files find ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules -type f | grep -v '/\.wh\.' >> ${files_list} @@ -55,6 +61,7 @@ if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/modules ]; then | grep -v '/\.wh\.' | grep -v '/\.modulerc\.lua' | sed -e 's/.lua$//' | sed -e 's@.*/modules/all/@@g' | sort -u \ >> ${module_files_list} fi + if [ -d ${eessi_version}/software/${os}/${cpu_arch_subdir}/software -a -r ${module_files_list} ]; then # installation directories but only those for which module files were created # Note, we assume that module names (as defined by 'PACKAGE_NAME/VERSION.lua' diff --git a/eessi_container.sh b/eessi_container.sh index 5e870c3b1b..bfc5963426 100755 --- a/eessi_container.sh +++ b/eessi_container.sh @@ -46,6 +46,7 @@ SAVE_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 8)) HTTP_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 9)) HTTPS_PROXY_ERROR_EXITCODE=$((${ANY_ERROR_EXITCODE} << 10)) RUN_SCRIPT_MISSING_EXITCODE=$((${ANY_ERROR_EXITCODE} << 11)) +NVIDIA_MODE_UNKNOWN_EXITCODE=$((${ANY_ERROR_EXITCODE} << 12)) # CernVM-FS settings CVMFS_VAR_LIB="var-lib-cvmfs" @@ -72,12 +73,17 @@ display_help() { echo " -a | --access {ro,rw} - ro (read-only), rw (read & write) [default: ro]" echo " -c | --container IMG - image file or URL defining the container to use" echo " [default: docker://ghcr.io/eessi/build-node:debian11]" - echo " -h | --help - display this usage information [default: false]" echo " -g | --storage DIR - directory space on host machine (used for" echo " temporary data) [default: 1. TMPDIR, 2. /tmp]" + echo " -h | --help - display this usage information [default: false]" + echo " -i | --host-injections - directory to link to for host_injections " + echo " [default: /..storage../opt-eessi]" echo " -l | --list-repos - list available repository identifiers [default: false]" echo " -m | --mode MODE - with MODE==shell (launch interactive shell) or" echo " MODE==run (run a script or command) [default: shell]" + echo " -n | --nvidia MODE - configure the container to work with NVIDIA GPUs," + echo " MODE==install for a CUDA installation, MODE==run to" + echo " attach a GPU, MODE==all for both [default: false]" echo " -r | --repository CFG - configuration file or identifier defining the" echo " repository to use [default: EESSI via" echo " default container, see --container]" @@ -111,6 +117,7 @@ VERBOSE=0 STORAGE= LIST_REPOS=0 MODE="shell" +SETUP_NVIDIA=0 REPOSITORY="EESSI" RESUME= SAVE= @@ -141,6 +148,10 @@ while [[ $# -gt 0 ]]; do display_help exit 0 ;; + -i|--host-injections) + USER_HOST_INJECTIONS="$2" + shift 2 + ;; -l|--list-repos) LIST_REPOS=1 shift 1 @@ -149,6 +160,11 @@ while [[ $# -gt 0 ]]; do MODE="$2" shift 2 ;; + -n|--nvidia) + SETUP_NVIDIA=1 + NVIDIA_MODE="$2" + shift 2 + ;; -r|--repository) REPOSITORY="$2" shift 2 @@ -224,6 +240,13 @@ if [[ "${MODE}" != "shell" && "${MODE}" != "run" ]]; then fatal_error "unknown execution mode '${MODE}'" "${MODE_UNKNOWN_EXITCODE}" fi +# Also validate the NVIDIA GPU mode (if present) +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" != "run" && "${NVIDIA_MODE}" != "install" && "${NVIDIA_MODE}" != "all" ]]; then + fatal_error "unknown NVIDIA mode '${NVIDIA_MODE}'" "${NVIDIA_MODE_UNKNOWN_EXITCODE}" + fi +fi + # TODO (arg -r|--repository) check if repository is known # REPOSITORY_ERROR_EXITCODE if [[ ! -z "${REPOSITORY}" && "${REPOSITORY}" != "EESSI" && ! -r ${EESSI_REPOS_CFG_FILE} ]]; then @@ -310,12 +333,25 @@ fi # |-overlay-work # |-home # |-repos_cfg +# |-opt-eessi (unless otherwise specificed for host_injections) # tmp dir for EESSI EESSI_TMPDIR=${EESSI_HOST_STORAGE} mkdir -p ${EESSI_TMPDIR} [[ ${VERBOSE} -eq 1 ]] && echo "EESSI_TMPDIR=${EESSI_TMPDIR}" +# Set host_injections directory and ensure it is a writable directory (if user provided) +if [ -z ${USER_HOST_INJECTIONS+x} ]; then + # Not set, so use our default + HOST_INJECTIONS=${EESSI_TMPDIR}/opt-eessi + mkdir -p $HOST_INJECTIONS +else + # Make sure the host_injections directory specified exists and is a folder + mkdir -p ${USER_HOST_INJECTIONS} || fatal_error "host_injections directory ${USER_HOST_INJECTIONS} is either not a directory or cannot be created" + HOST_INJECTIONS=${USER_HOST_INJECTIONS} +fi +[[ ${VERBOSE} -eq 1 ]] && echo "HOST_INJECTIONS=${HOST_INJECTIONS}" + # configure Singularity: if SINGULARITY_CACHEDIR is already defined, use that # a global SINGULARITY_CACHEDIR would ensure that we don't consume # storage space again and again for the container & also speed-up @@ -394,12 +430,36 @@ fi [[ ${VERBOSE} -eq 1 ]] && echo "SINGULARITY_HOME=${SINGULARITY_HOME}" # define paths to add to SINGULARITY_BIND (added later when all BIND mounts are defined) -BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs" +BIND_PATHS="${EESSI_CVMFS_VAR_LIB}:/var/lib/cvmfs,${EESSI_CVMFS_VAR_RUN}:/var/run/cvmfs,${HOST_INJECTIONS}:/opt/eessi" # provide a '/tmp' inside the container BIND_PATHS="${BIND_PATHS},${EESSI_TMPDIR}:${TMP_IN_CONTAINER}" [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" +declare -a ADDITIONAL_CONTAINER_OPTIONS=() + +# Configure anything we need for NVIDIA GPUs and CUDA installation +if [[ ${SETUP_NVIDIA} -eq 1 ]]; then + if [[ "${NVIDIA_MODE}" == "run" || "${NVIDIA_MODE}" == "all" ]]; then + # Give singularity the appropriate flag + ADDITIONAL_CONTAINER_OPTIONS+=("--nv") + [[ ${VERBOSE} -eq 1 ]] && echo "ADDITIONAL_CONTAINER_OPTIONS=${ADDITIONAL_CONTAINER_OPTIONS[@]}" + fi + if [[ "${NVIDIA_MODE}" == "install" || "${NVIDIA_MODE}" == "all" ]]; then + # Add additional bind mounts to allow CUDA to install within a container + # (Experience tells us that these are necessary, but we don't know _why_ + # as the CUDA installer is a black box. The suspicion is that the CUDA + # installer gets confused by the permissions on these directories when + # inside a container) + EESSI_VAR_LOG=${EESSI_TMPDIR}/var-log + EESSI_USR_LOCAL_CUDA=${EESSI_TMPDIR}/usr-local-cuda + mkdir -p ${EESSI_VAR_LOG} + mkdir -p ${EESSI_USR_LOCAL_CUDA} + BIND_PATHS="${BIND_PATHS},${EESSI_VAR_LOG}:/var/log,${EESSI_USR_LOCAL_CUDA}:/usr/local/cuda" + [[ ${VERBOSE} -eq 1 ]] && echo "BIND_PATHS=${BIND_PATHS}" + fi +fi + # set up repository config (always create directory repos_cfg and populate it with info when # arg -r|--repository is used) mkdir -p ${EESSI_TMPDIR}/repos_cfg @@ -513,6 +573,10 @@ fi # 4. set up vars and dirs specific to a scenario declare -a EESSI_FUSE_MOUNTS=() + +# always mount cvmfs-config repo (to get access to software.eessi.io) +# EESSI_FUSE_MOUNTS+=("--fusemount" "container:cvmfs2 cvmfs-config.cern.ch /cvmfs/cvmfs-config.cern.ch") + if [[ "${ACCESS}" == "ro" ]]; then export EESSI_READONLY="container:cvmfs2 ${repo_name} /cvmfs/${repo_name}" @@ -558,8 +622,8 @@ if [ ! -z ${EESSI_SOFTWARE_SUBDIR_OVERRIDE} ]; then fi echo "Launching container with command (next line):" -echo "singularity ${RUN_QUIET} ${MODE} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" -singularity ${RUN_QUIET} ${MODE} "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" +echo "singularity ${RUN_QUIET} ${MODE} ${ADDITIONAL_CONTAINER_OPTIONS[@]} ${EESSI_FUSE_MOUNTS[@]} ${CONTAINER} $@" +singularity ${RUN_QUIET} ${MODE} "${ADDITIONAL_CONTAINER_OPTIONS[@]}" "${EESSI_FUSE_MOUNTS[@]}" ${CONTAINER} "$@" exit_code=$? # 6. save tmp if requested (arg -s|--save)