Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Final steps in Syncing with EESSI #232

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 29 additions & 11 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,46 @@ fi
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)

# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}

# Install full CUDA SDK in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula

# Install drivers in host_injections
# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z ${changed_easystacks} ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
for easystack_file in ${changed_easystacks}; do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
Expand All @@ -221,18 +236,21 @@ else
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

done
fi

### add packages here

echo ">> Creating/updating Lmod cache..."
export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua"
if [ ! -f $LMOD_RC ]; then
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC"
fi
Expand Down
80 changes: 80 additions & 0 deletions create_lmodrc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,85 @@
}
"""

GPU_LMOD_RC ="""require("strict")
local hook = require("Hook")
local open = io.open

local function read_file(path)
local file = open(path, "rb") -- r read mode and b binary mode
if not file then return nil end
local content = file:read "*a" -- *a or *all reads the whole file
file:close()
return content
end

local function cuda_enabled_load_hook(t)
local frameStk = require("FrameStk"):singleton()
local mt = frameStk:mt()
local simpleName = string.match(t.modFullName, "(.-)/")
-- If we try to load CUDA itself, check if the full CUDA SDK was installed on the host in host_injections.
-- This is required for end users to build additional CUDA software. If the full SDK isn't present, refuse
-- to load the CUDA module and print an informative message on how to set up GPU support for EESSI
local refer_to_docs = "For more information on how to do this, see https://www.eessi.io/docs/gpu/.\\n"
if simpleName == 'CUDA' then
-- get the full host_injections path
local hostInjections = string.gsub(os.getenv('EESSI_SOFTWARE_PATH') or "", 'versions', 'host_injections')
-- build final path where the CUDA software should be installed
local cudaEasyBuildDir = hostInjections .. "/software/" .. t.modFullName .. "/easybuild"
local cudaDirExists = isDir(cudaEasyBuildDir)
if not cudaDirExists then
local advice = "but while the module file exists, the actual software is not entirely shipped with EESSI "
advice = advice .. "due to licencing. You will need to install a full copy of the CUDA SDK where EESSI "
advice = advice .. "can find it.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
end
end
-- when loading CUDA enabled modules check if the necessary driver libraries are accessible to the EESSI linker,
-- otherwise, refuse to load the requested module and print error message
local haveGpu = mt:haveProperty(simpleName,"arch","gpu")
if haveGpu then
local arch = os.getenv("EESSI_CPU_FAMILY") or ""
local cudaVersionFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/cuda_version.txt"
local cudaDriverFile = "/cvmfs/software.eessi.io/host_injections/nvidia/" .. arch .. "/latest/libcuda.so"
local cudaDriverExists = isFile(cudaDriverFile)
local singularityCudaExists = isFile("/.singularity.d/libs/libcuda.so")
if not (cudaDriverExists or singularityCudaExists) then
local advice = "which relies on the CUDA runtime environment and driver libraries. "
advice = advice .. "In order to be able to use the module, you will need "
advice = advice .. "to make sure EESSI can find the GPU driver libraries on your host system.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYou requested to load ", simpleName, " ", advice)
else
-- CUDA driver exists, now we check its version to see if an update is needed
if cudaDriverExists then
local cudaVersion = read_file(cudaVersionFile)
local cudaVersion_req = os.getenv("EESSICUDAVERSION")
-- driver CUDA versions don't give a patch version for CUDA
local major, minor = string.match(cudaVersion, "(%d+)%.(%d+)")
local major_req, minor_req, patch_req = string.match(cudaVersion_req, "(%d+)%.(%d+)%.(%d+)")
local driver_libs_need_update = false
if major < major_req then
driver_libs_need_update = true
elseif major == major_req then
if minor < minor_req then
driver_libs_need_update = true
end
end
if driver_libs_need_update == true then
local advice = "but the module you want to load requires CUDA " .. cudaVersion_req .. ". "
advice = advice .. "Please update your CUDA driver libraries and then "
advice = advice .. "let EESSI know about the update.\\n"
advice = advice .. refer_to_docs
LmodError("\\nYour driver CUDA version is ", cudaVersion, " ", advice)
end
end
end
end
end

hook.register("load", cuda_enabled_load_hook)
"""

def error(msg):
sys.stderr.write("ERROR: %s\n" % msg)
Expand All @@ -36,6 +115,7 @@ def error(msg):
'dot_lmod': DOT_LMOD,
'prefix': prefix,
}
lmodrc_txt += '\n' + GPU_LMOD_RC
try:
os.makedirs(os.path.dirname(lmodrc_path), exist_ok=True)
with open(lmodrc_path, 'w') as fp:
Expand Down
Loading
Loading