Skip to content

Commit

Permalink
Merge branch 'nessi.no-2023.06' of github-trz:NorESSI/software-layer …
Browse files Browse the repository at this point in the history
…into sync_nessi_eessi_test_pr
  • Loading branch information
truib committed Jan 22, 2024
2 parents b61d423 + 04044d8 commit 2bd8426
Show file tree
Hide file tree
Showing 50 changed files with 1,759 additions and 247 deletions.
Original file line number Diff line number Diff line change
@@ -1,80 +1,66 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
name: Tests relying on having EESSI pilot repo mounted
name: Check for missing software installations in pilot.nessi.no
on: [push, pull_request, workflow_dispatch]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
pilot:
runs-on: ubuntu-20.04
check_missing:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
EESSI_VERSION:
- 2023.06
EESSI_SOFTWARE_SUBDIR:
EESSI_SOFTWARE_SUBDIR_OVERRIDE:
# - aarch64/generic
- x86_64/amd/zen2
- x86_64/intel/broadwell
# - x86_64/intel/cascadelake
- x86_64/intel/skylake_avx512
- x86_64/generic
EASYSTACK_FILE:
- eessi-2023.06-eb-4.7.2-2021a.yml
- eessi-2023.06-eb-4.7.2-2021b.yml
- eessi-2023.06-eb-4.7.2-2022a.yml
- eessi-2023.06-eb-4.7.2-2022b.yml
- eessi-2023.06-eb-4.7.2-system.yml
- eessi-2023.06-eb-4.8.0-system.yml
- eessi-2023.06-eb-4.8.1-2022a.yml
- eessi-2023.06-eb-4.8.1-system.yml
- eessi-2023.06-eb-4.8.2-2022a.yml
steps:
- name: Check out software-layer repository
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: Mount EESSI CernVM-FS pilot repository
- name: Mount NESSI CernVM-FS repository
uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
with:
cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
cvmfs_http_proxy: DIRECT
cvmfs_repositories: pilot.nessi.no

- name: Test check_missing_installations.sh script with EESSI_SOFTWARE_SUBDIR_OVERRIDE
if: '!cancelled()'
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
module load EasyBuild
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
env | grep ^EESSI | sort
echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
- name: Test check_missing_installations.sh script without EESSI_SOFTWARE_SUBDIR_OVERRIDE
if: '!cancelled()'
- name: Test check_missing_installations.sh script
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
# set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
# to prevent issues with checks in the Easybuild configuration that use this variable
export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
echo "just run check_missing_installations.sh (should use ${{matrix.EASYSTACK_FILE}})"
./check_missing_installations.sh ${{matrix.EASYSTACK_FILE}}
echo "just run check_missing_installations.sh (should use easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-*.yml)"
for easystack_file in $(ls easystacks/pilot.nessi.no/${{matrix.EESSI_VERSION}}/eessi-${{matrix.EESSI_VERSION}}-eb-*.yml); do
echo "check missing installations for ${easystack_file}..."
./check_missing_installations.sh ${easystack_file}
ec=$?
if [[ ${ec} -ne 0 ]]; then echo "missing installations found for ${easystack_file}!" >&2; exit ${ec}; fi
done
- name: Test check_missing_installations.sh with missing package (GCC/8.3.0)
if: '!cancelled()'
run: |
export EESSI_SOFTWARE_SUBDIR_OVERRIDE=${{matrix.EESSI_SOFTWARE_SUBDIR_OVERRIDE}}
source /cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}/init/bash
# set $EESSI_CPU_FAMILY to the CPU architecture that corresponds to $EESSI_SOFTWARE_SUBDIR_OVERRIDE (part before the first slash),
# to prevent issues with checks in the Easybuild configuration that use this variable
export EESSI_CPU_FAMILY=${EESSI_SOFTWARE_SUBDIR_OVERRIDE%%/*}
module load EasyBuild
which eb
eb --version
export EESSI_PREFIX=/cvmfs/pilot.nessi.no/versions/${{matrix.EESSI_VERSION}}
export EESSI_OS_TYPE=linux
export EESSI_SOFTWARE_SUBDIR=${{matrix.EESSI_SOFTWARE_SUBDIR}}
env | grep ^EESSI | sort
# create dummy easystack file with a single entry (something that is not installed in EESSI)
easystack_file="test.yml"
Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/test_eessi_container_script.yml
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_default' ]]; then
outfile=out_listrepos.txt
./eessi_container.sh --verbose --list-repos | tee ${outfile}
grep "EESSI-pilot" ${outfile}
grep "EESSI" ${outfile}
# test use of --list-repos with custom repos.cfg
elif [[ ${{matrix.SCRIPT_TEST}} == 'listrepos_custom' ]]; then
Expand All @@ -57,7 +57,7 @@ jobs:
echo "[EESSI/20HT.TP]" >> cfg/repos.cfg
echo "repo_version = 20HT.TP" >> cfg/repos.cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile}
grep "EESSI-pilot" ${outfile}
grep "EESSI" ${outfile}
export EESSI_REPOS_CFG_DIR_OVERRIDE=${PWD}/cfg
./eessi_container.sh --verbose --list-repos | tee ${outfile2}
Expand Down Expand Up @@ -90,15 +90,15 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'readwrite' ]]; then
outfile=out_readwrite.txt
fn="test_${RANDOM}.txt"
echo "touch /cvmfs/pilot.eessi-hpc.org/${fn}" > test_script.sh
echo "touch /cvmfs/pilot.nessi.no/${fn}" > test_script.sh
chmod u+x test_script.sh
export SINGULARITY_BIND="$PWD:/test"
./eessi_container.sh --verbose --access rw --mode run /test/test_script.sh > ${outfile}
tmpdir=$(grep "\-\-resume" ${outfile} | sed "s/.*--resume \([^']*\).*/\1/g")
# note: must use '--access rw' again here, since touched file is in overlay upper dir
./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
./eessi_container.sh --verbose --resume ${tmpdir} --access rw --mode shell <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
# test use of --resume
elif [[ ${{matrix.SCRIPT_TEST}} == 'resume' ]]; then
Expand All @@ -120,12 +120,12 @@ jobs:
elif [[ ${{matrix.SCRIPT_TEST}} == 'save' ]]; then
outfile=out_save.txt
fn="test_${RANDOM}.txt"
test_cmd="touch /cvmfs/pilot.eessi-hpc.org/${fn}"
test_cmd="touch /cvmfs/pilot.nessi.no/${fn}"
./eessi_container.sh --verbose --mode shell --access rw --save test-save.tar <<< "${test_cmd}" 2>&1 | tee ${outfile}
rm -f ${outfile}
./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.eessi-hpc.org/${fn}" > ${outfile}
grep "/cvmfs/pilot.eessi-hpc.org/${fn}$" $outfile
./eessi_container.sh --verbose --mode shell --access rw --resume test-save.tar <<< "ls -l /cvmfs/pilot.nessi.no/${fn}" > ${outfile}
grep "/cvmfs/pilot.nessi.no/${fn}$" $outfile
tar tfv test-save.tar | grep "overlay-upper/${fn}"
Expand Down
20 changes: 20 additions & 0 deletions .github/workflows/test_licenses.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# documentation: https://help.github.com/en/articles/workflow-syntax-for-github-actions
name: Test software licenses
on: [push, pull_request]
permissions:
contents: read # to fetch code (actions/checkout)
jobs:
build:
runs-on: ubuntu-20.04
steps:
- name: Check out software-layer repository
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: set up Python
uses: actions/setup-python@13ae5bb136fac2878aff31522b9efb785519f984 # v4.3.0
with:
python-version: '3.9'

- name: Check software licenses
run: |
python licenses/spdx.py licenses/licenses.json
45 changes: 40 additions & 5 deletions .github/workflows/tests_archdetect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,59 @@ jobs:
- x86_64/intel/skylake_avx512/archspec-linux-6132
- x86_64/amd/zen2/Azure-CentOS7-7V12
- x86_64/amd/zen3/Azure-CentOS7-7V73X
- ppc64le/power9le/unknown-power9le
- aarch64/arm/neoverse-n1/Azure-Ubuntu20-Altra
- aarch64/arm/neoverse-n1/AWS-awslinux-graviton2
- aarch64/arm/neoverse-v1/AWS-awslinux-graviton3
# commented out since these targets are currently not supported in pilot.nessi.no repo
# (and some tests assume that the corresponding subdirectory in software layer is there)
# - ppc64le/power9le/unknown-power9le
# - aarch64/neoverse-n1/Azure-Ubuntu20-Altra
# - aarch64/neoverse-n1/AWS-awslinux-graviton2
# - aarch64/neoverse-v1/AWS-awslinux-graviton3
fail-fast: false
steps:
- name: checkout
uses: actions/checkout@93ea575cb5d8a053eaa0ac8fa3b40d7e05a33cc8 # v3.1.0

- name: Mount NESSI CernVM-FS repository
uses: cvmfs-contrib/github-action-cvmfs@d4641d0d591c9a5c3be23835ced2fb648b44c04b # v3.1
with:
cvmfs_config_package: https://github.com/NorESSI/filesystem-layer/releases/download/latest/cvmfs-config-nessi_latest_all.deb
cvmfs_http_proxy: DIRECT
cvmfs_repositories: pilot.nessi.no

- name: test eessi_archdetect.sh
run: |
export EESSI_MACHINE_TYPE=${{matrix.proc_cpuinfo}}
export EESSI_MACHINE_TYPE=${EESSI_MACHINE_TYPE%%/*}
export EESSI_PROC_CPUINFO=./tests/archdetect/${{matrix.proc_cpuinfo}}.cpuinfo
# check that printing of best match works correctly
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.output )" ]]; then
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH" >&2
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCH"
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCH" >&2
exit 1
fi
# check that $EESSI_SOFTWARE_SUBDIR_OVERRIDE is honored
export EESSI_SOFTWARE_SUBDIR_OVERRIDE='dummy/cpu'
CPU_ARCH=$(./init/eessi_archdetect.sh cpupath)
if [[ $CPU_ARCH == "${EESSI_SOFTWARE_SUBDIR_OVERRIDE}" ]]; then
echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE PASSED"
else
echo "Test for picking up on \$EESSI_SOFTWARE_SUBDIR_OVERRIDE FAILED" >&2
exit 1
fi
unset EESSI_SOFTWARE_SUBDIR_OVERRIDE
# check that printing of all matches works correctly (-a option for cpupath action)
CPU_ARCHES=$(./init/eessi_archdetect.sh -a cpupath)
if [[ $CPU_ARCHES == "$( cat ./tests/archdetect/${{matrix.proc_cpuinfo}}.all.output )" ]]; then
echo "Test for ${{matrix.proc_cpuinfo}} PASSED: $CPU_ARCHES"
else
echo "Test for ${{matrix.proc_cpuinfo}} FAILED: $CPU_ARCHES" >&2
exit 1
fi
# Check all those architectures actually exist (if this EESSI version has been populated already)
if [ -d ${EESSI_PREFIX}/software/linux ]; then
for dir in $(echo "$CPU_ARCHES" | tr ':' '\n'); do
# Search all EESSI versions as we may drop support at some point
ls -d ${EESSI_PREFIX}/software/linux/${dir}
done
fi
6 changes: 3 additions & 3 deletions .github/workflows/tests_scripts.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- EESSI-pilot-install-software.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
Expand All @@ -16,7 +16,7 @@ on:
paths:
- build_container.sh
- create_directory_tarballs.sh
- EESSI-pilot-install-software.sh
- EESSI-install-software.sh
- install_software_layer.sh
- load_easybuild_module.sh
- run_in_compat_layer_env.sh
Expand All @@ -40,7 +40,7 @@ jobs:
# bind current directory into container as /software-layer
export SINGULARITY_BIND="${PWD}:/software-layer"
# can't test with EasyBuild versions older than v4.5.2 when using EESSI pilot 2023.06,
# can't test with EasyBuild versions older than v4.5.2 when using EESSI 2023.06,
# since Python in compat layer is Python 3.11.x;
# testing with a single EasyBuild version takes a while in GitHub Actions, so stick to a single sensible version
for EB_VERSION in '4.6.0'; do
Expand Down
40 changes: 29 additions & 11 deletions EESSI-install-software.sh
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,46 @@ fi
# assume there's only one diff file that corresponds to the PR patch file
pr_diff=$(ls [0-9]*.diff | head -1)

# install any additional required scripts
# order is important: these are needed to install a full CUDA SDK in host_injections
# for now, this just reinstalls all scripts. Note the most elegant, but works
${TOPDIR}/install_scripts.sh --prefix ${EESSI_PREFIX}

# Install full CUDA SDK in host_injections
# Hardcode this for now, see if it works
# TODO: We should make a nice yaml and loop over all CUDA versions in that yaml to figure out what to install
${EESSI_PREFIX}/scripts/gpu_support/nvidia/install_cuda_host_injections.sh -c 12.1.1 --accept-cuda-eula

# Install drivers in host_injections
# TODO: this is commented out for now, because the script assumes that nvidia-smi is available and works;
# if not, an error is produced, and the bot flags the whole build as failed (even when not installing GPU software)
# ${EESSI_PREFIX}/scripts/gpu_support/nvidia/link_nvidia_host_libraries.sh

# use PR patch file to determine in which easystack files stuff was added
changed_easystacks=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^easystacks/.*yml$' | egrep -v 'known-issues|missing')
if [ -z ${changed_easystacks} ]; then
echo "No missing installations, party time!" # Ensure the bot report success, as there was nothing to be build here
else
for easystack_file in ${changed_easystacks}; do

echo -e "Processing easystack file ${easystack_file}...\n\n"

# determine version of EasyBuild module to load based on EasyBuild version included in name of easystack file
eb_version=$(echo ${easystack_file} | sed 's/.*eb-\([0-9.]*\).*/\1/g')

# load EasyBuild module (will be installed if it's not available yet)
source ${TOPDIR}/load_easybuild_module.sh ${eb_version}

${EB} --show-config

echo_green "All set, let's start installing some software with EasyBuild v${eb_version} in ${EASYBUILD_INSTALLPATH}..."

if [ -f ${easystack_file} ]; then
echo_green "Feeding easystack file ${easystack_file} to EasyBuild..."

${EB} --easystack ${TOPDIR}/${easystack_file} --robot
ec=$?

# copy EasyBuild log file if EasyBuild exited with an error
if [ ${ec} -ne 0 ]; then
eb_last_log=$(unset EB_VERBOSE; eb --last-log)
Expand All @@ -221,18 +236,21 @@ else
# copy to build logs dir (with context added)
copy_build_log "${eb_last_log}" "${build_logs_dir}"
fi

$TOPDIR/check_missing_installations.sh ${TOPDIR}/${easystack_file}
else
fatal_error "Easystack file ${easystack_file} not found!"
fi

done
fi

### add packages here

echo ">> Creating/updating Lmod cache..."
export LMOD_RC="${EASYBUILD_INSTALLPATH}/.lmod/lmodrc.lua"
if [ ! -f $LMOD_RC ]; then
lmodrc_changed=$(cat ${pr_diff} | grep '^+++' | cut -f2 -d' ' | sed 's@^[a-z]/@@g' | grep '^create_lmodrc.py$' > /dev/null; echo $?)
if [ ! -f $LMOD_RC ] || [ ${lmodrc_changed} == '0' ]; then
python3 $TOPDIR/create_lmodrc.py ${EASYBUILD_INSTALLPATH}
check_exit_code $? "$LMOD_RC created" "Failed to create $LMOD_RC"
fi
Expand Down
5 changes: 5 additions & 0 deletions bot/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,11 @@ mkdir -p ${TARBALL_TMP_BUILD_STEP_DIR}
declare -a BUILD_STEP_ARGS=()
BUILD_STEP_ARGS+=("--save" "${TARBALL_TMP_BUILD_STEP_DIR}")
BUILD_STEP_ARGS+=("--storage" "${STORAGE}")
# add options required to handle NVIDIA support
BUILD_STEP_ARGS+=("--nvidia" "all")
if [[ ! -z ${SHARED_FS_PATH} ]]; then
BUILD_STEP_ARGS+=("--host-injections" "${SHARED_FS_PATH}/host-injections")
fi

# prepare arguments to install_software_layer.sh (specific to build step)
declare -a INSTALL_SCRIPT_ARGS=()
Expand Down
1 change: 1 addition & 0 deletions bot/check-result.sh
20 changes: 20 additions & 0 deletions bot/check-test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash
#
# Dummy script that only creates test result file for the bot, without actually checking anything
#
# This script is part of the EESSI software layer, see
# https://github.com/EESSI/software-layer.git
#
# author: Kenneth Hoste (HPC-UGent)
#
# license: GPLv2
#
job_dir=${PWD}
job_out="slurm-${SLURM_JOB_ID}.out"
job_test_result_file="_bot_job${SLURM_JOB_ID}.test"

echo "[TEST]" > ${job_test_result_file}
echo "comment_description = <em>(no tests yet)</em>" >> ${job_test_result_file}
echo "status = SUCCESS" >> ${job_test_result_file}

exit 0
Loading

0 comments on commit 2bd8426

Please sign in to comment.