-
Notifications
You must be signed in to change notification settings - Fork 3
204 lines (202 loc) · 7.56 KB
/
apptainer_exachem.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
name: exachem_apptainer
on:
schedule:
- cron: '0 0 * * SUN'
repository_dispatch:
types: [backend_automation]
workflow_dispatch:
workflow_call:
jobs:
build_image:
runs-on: ubuntu-22.04
strategy:
fail-fast: false
matrix:
folder:
- apptainer.ompi41x
branch:
- main
armci_network:
- MPI-PR
elpa:
- N
gpu:
- cpu
- nvidia_70
- nvidia_80
- nvidia_90
- amd_gfx908_rocm5.5.1
- amd_gfx90a_rocm5.5.1
- amd_gfx908_rocm6.0
- amd_gfx90a_rocm6.0
- amd_gfx908_rocm5.7.3
- amd_gfx90a_rocm5.7.3
include:
- gpu: nvidia_70
armci_network: MPI-PR
folder: apptainer.ompi41x
branch: main
elpa: Y
- gpu: nvidia_80
armci_network: MPI-PR
folder: apptainer.ompi41x
branch: main
elpa: Y
- gpu: nvidia_90
armci_network: MPI-PR
folder: apptainer.ompi41x
branch: main
elpa: Y
steps:
- name: Get Instruction
id: get-simd
run: |
/usr/bin/lscpu
curl -LJO https://raw.githubusercontent.com/nwchemgit/nwchem/master/travis/guess_simd.sh
chmod +x guess_simd.sh
echo "simd=$(./guess_simd.sh)" >> $GITHUB_ENV
- name: exachem version tag
run: |
if [[ ${{ matrix.branch }} != main ]]; then
myversion=-${{ matrix.branch }}
else
myversion=""
fi
if [[ ${{ matrix.gpu }} != nvidia_70 ]]; then
myversion+=.${{ matrix.gpu }}
fi
echo "exachemversion=$myversion" >> $GITHUB_ENV
if [[ ${{ matrix.armci_network }} != MPI-PR ]]; then
net_tag=$(echo ${{ matrix.armci_network }} | tr '[:upper:]' '[:lower:]')
echo "armci_network="${net_tag}"." >> $GITHUB_ENV
fi
if [[ ${{ matrix.elpa }} == Y ]]; then
echo "elpa="elpa"." >> $GITHUB_ENV
fi
- name: tag
run: |
echo "tag=oras://ghcr.io/${{ github.repository }}/apptainer${{ env.exachemversion}}.${{ env.armci_network}}${{ env.elpa}}ompi41x" >> $GITHUB_ENV
- name: echo-tag
run: |
echo ' the apptainer tag is ' '${{ env.tag }}'
- name: Checkout image source
uses: actions/checkout@v4
with:
clean: false
- name: pkg cleanup
run: |
df -h
if [[ $(uname -s) == "Linux" ]]; then
ubuntu_ver=$(cat /etc/os-release | grep VERSION_ID |cut -d \" -f 2)
dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n
sudo apt-get purge -y azure-cli || true
sudo apt-get purge -y google-cloud-cli microsoft-edge-stable dotnet-sdk-7.0 dotnet-sdk-6.0 google-chrome-stable firefox
sudo apt-get purge -y temurin-17-jdk temurin-11-jdk temurin-8-jdk
if [[ $ubuntu_ver == "20.04" ]]; then
sudo apt-get purge -y llvm-12-dev llvm-11-dev llvm-10-dev
sudo apt-get purge -y hhvm
sudo apt-get purge -y libgl1-mesa-dri
fi
if [[ $ubuntu_ver == "22.04" ]]; then
sudo apt-get purge -y llvm-13-dev llvm-14-dev llvm-15-dev
fi
sudo apt-get -y clean
sudo apt-get autoremove -y
dpkg-query -Wf '${Installed-Size}\t${Package}\n' | sort -n
df -h
fi
shell: bash
- name: Setup cache
id: setup-cache
uses: actions/cache@v4
with:
path: |
~/cache
key: ${{ matrix.folder}}-${{ env.exachemversion}}-exachemcache-v001
- name: fetch cache
if: steps.setup-cache.outputs.cache-hit == 'true'
run: |
ls -lart ~/cache
if [[ -f ~/cache/ompi/lib/libmpi.so ]]; then \
echo "ompi cache present" ; \
fi
if [[ -f ~/cache/libint.tar.bz2 ]]; then \
echo "libint cache present" ; \
fi
- name: Install apptainer
env:
APP_VER: 1.2.3
run: |
sudo apt-get install -y libfuse2 libseccomp2 zlib1g uidmap squashfs-tools squashfuse fuse2fs fuse-overlayfs fakeroot
wget -q https://github.com/apptainer/apptainer/releases/download/v${APP_VER}/apptainer_${APP_VER}_amd64.deb
sudo dpkg -i apptainer_${APP_VER}_amd64.deb
which apptainer
apptainer version
- name: Checkout exachem source
uses: actions/checkout@v4
with:
repository: Exachem/exachem
ref: ${{ matrix.branch}}
clean: false
fetch-depth: 0
path: exachem
- name: build an apptainer container
run: |
cd ${{ matrix.folder }}
MYUSERNAME=${{ github.actor}} BRANCH=${{ matrix.branch}} GPU=${{ matrix.gpu}} ARMCI_NETWORK=${{ matrix.armci_network}} ELPA=${{ matrix.elpa}} MPI_IMPL=ompi apptainer build --fakeroot ${{ matrix.folder }}.simg Singularity
- name: builddate
id: build-date
run: |
echo "date=$(TZ=America/Los_Angeles date +%Y%m%d_%H%M%S)" >> $GITHUB_ENV
- name: push to ghcr.io
run: |
echo ${{ secrets.GITHUB_TOKEN }} | apptainer remote login -u ${{ github.actor }} --password-stdin oras://ghcr.io
apptainer remote list
exit_code=0
apptainer push ${{ matrix.folder }}/${{ matrix.folder }}.simg ${{ env.tag }}:${{ env.date }} || exit_code=$?
apptainer push ${{ matrix.folder }}/${{ matrix.folder }}.simg ${{ env.tag }}:latest|| exit_code+=$?
if [[ $exit_code == 0 ]]; then
echo "push_truefalse=true" >> $GITHUB_ENV
else
echo "push_truefalse=false" >> $GITHUB_ENV
fi
- name: store cache
if: ${{ env.push_truefalse == 'true' }}
run: |
mkdir -p ~/cache || true
cd ${{ matrix.folder }}
apptainer exec ${{ matrix.folder }}.simg cp -r /opt/ompi ~/cache
apptainer exec ${{ matrix.folder }}.simg ls -l /opt/install/exachem/include || true
apptainer exec ${{ matrix.folder }}.simg ls -l /opt/install/exachem || true
apptainer exec ${{ matrix.folder }}.simg tar cjf ~/cache/libint.tar.bz2 /opt/install/exachem/include/libint2/ /opt/install/exachem/include/libint2.hpp /opt/install/exachem/include/libint2.h /opt/install/exachem/share/libint /opt/install/exachem/lib/cmake/libint2 /opt/install/exachem/lib/libint2.a /opt/install/exachem/lib/pkgconfig/libint2.pc
echo "cache stored"
ls -l ~/cache
- name: ldd check
if: ${{ env.push_truefalse == 'true' }}
run: |
pwd
ls -la
apptainer pull -F ${{ env.tag }}:${{ env.date }}
export UCX_TLS=tcp,self
export UCX_POSIX_USE_PROC_LINK=n
export MPIRUN_NPOPT="-x UCX_POSIX_USE_PROC_LINK=n -x UCX_TLS=tcp,self -n"
sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
which mpirun
ldd `which mpirun`
apptainer exec ${{ env.tag }}:latest ldd /opt/install/exachem/bin/ExaChem
- name: test cpu image with apptainer
if: ${{ (env.push_truefalse == 'true') && (matrix.gpu == 'cpu') }}
run: |
pwd
ls -la
apptainer pull -F ${{ env.tag }}:${{ env.date }}
export UCX_TLS=tcp,self
export UCX_POSIX_USE_PROC_LINK=n
sudo apt-get install -y openmpi-bin openmpi-common libopenmpi-dev
which mpirun
ldd `which mpirun`
APPTAINERENV_OMPI_MCA_btl_vader_single_copy_mechanism=none \
OMPI_MCA_btl_vader_single_copy_mechanism=none \
mpirun -np 2 apptainer exec ${{ env.tag }}:${{ env.date }} \
/opt/install/exachem/bin/ExaChem \
inputs/waterdft.json