diff --git a/.github/workflows/docker-devito.yml b/.github/workflows/docker-devito.yml index a90c362f85..da60249c17 100644 --- a/.github/workflows/docker-devito.yml +++ b/.github/workflows/docker-devito.yml @@ -20,26 +20,26 @@ jobs: include: - base: 'bases:nvidia-nvc' tag: 'nvidia-nvc' - flag: '--gpus all' + flag: '--init --gpus all' test: 'tests/test_gpu_openacc.py tests/test_gpu_common.py' runner: ["self-hosted", "nvidiagpu"] # Runtime gpu flags from https://hub.docker.com/r/rocm/tensorflow/ - base: 'bases:amd' tag: 'amd' - flag: '--network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined' + flag: '--init --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined' test: 'tests/test_gpu_openmp.py' runner: ["self-hosted", "amdgpu"] - base: 'bases:cpu-gcc' tag: "gcc" - flag: '' + flag: '--init -t' test: 'tests/test_operator.py' runner: ubuntu-latest - base: 'bases:cpu-icx' tag: "icx" - flag: '' + flag: '--init -t' test: 'tests/test_operator.py' runner: ubuntu-latest diff --git a/.github/workflows/pytest-core-mpi.yml b/.github/workflows/pytest-core-mpi.yml index a5d2354e33..4f2853e46e 100644 --- a/.github/workflows/pytest-core-mpi.yml +++ b/.github/workflows/pytest-core-mpi.yml @@ -87,9 +87,9 @@ jobs: - name: Test with pytest run: | - docker run --rm -t -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e OMP_NUM_THREADS=1 --name testrun devito_img pytest tests/test_mpi.py + docker run --init -t --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} -e OMP_NUM_THREADS=1 --name testrun devito_img pytest tests/test_mpi.py - name: Test examples with MPI run: | - docker run --rm -t ${{ matrix.mpiflag }} -e DEVITO_MPI=1 -e OMP_NUM_THREADS=1 --name examplerun devito_img mpiexec -n 2 pytest examples/seismic/acoustic - docker run --rm -t -e DEVITO_MPI=1 -e OMP_NUM_THREADS=1 --name examplerun devito_img mpiexec -n 2 pytest examples/seismic/tti \ No newline at end of file + docker run --init -t --rm ${{ matrix.mpiflag }} -e DEVITO_MPI=1 -e OMP_NUM_THREADS=1 --name examplerun devito_img mpiexec -n 2 pytest examples/seismic/acoustic + docker run --init -t --rm -e DEVITO_MPI=1 -e OMP_NUM_THREADS=1 --name examplerun devito_img mpiexec -n 2 pytest examples/seismic/tti \ No newline at end of file diff --git a/.github/workflows/pytest-core-nompi.yml b/.github/workflows/pytest-core-nompi.yml index 6e9df39498..54b5b05b1f 100644 --- a/.github/workflows/pytest-core-nompi.yml +++ b/.github/workflows/pytest-core-nompi.yml @@ -134,7 +134,7 @@ jobs: - name: Set run prefix run: | if [[ "${{ matrix.name }}" =~ "docker" ]]; then - echo "RUN_CMD=docker run --rm -t -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV + echo "RUN_CMD=docker run --init -t --rm -e CODECOV_TOKEN=${{ secrets.CODECOV_TOKEN }} --name testrun devito_img" >> $GITHUB_ENV else echo "RUN_CMD=" >> $GITHUB_ENV fi diff --git a/.github/workflows/pytest-gpu.yml b/.github/workflows/pytest-gpu.yml index 496841d688..a387a9c357 100644 --- a/.github/workflows/pytest-gpu.yml +++ b/.github/workflows/pytest-gpu.yml @@ -57,7 +57,7 @@ jobs: base: "devitocodes/bases:nvidia-nvc" tags: ["self-hosted", "nvidiagpu"] test_drive_cmd: "nvidia-smi" - flags: '--gpus all --rm -t --name testrun-nvc' + flags: '--init --gpus all --rm -t --name testrun-nvc' - name: pytest-gpu-omp-amd test_files: "tests/test_adjoint.py tests/test_gpu_common.py tests/test_gpu_openmp.py" @@ -66,7 +66,7 @@ jobs: test_drive_cmd: "rocm-smi" # Attach the AMD GPU devices `/dev` and add user to video and render (109 on wampa) group # Options from https://rocmdocs.amd.com/en/latest/ROCm_Virtualization_Containers/ROCm-Virtualization-&-Containers.html - flags: "--network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --rm -t --name testrun-amd" + flags: "--init --network=host --device=/dev/kfd --device=/dev/dri --ipc=host --group-add video --group-add $(getent group render | cut -d: -f3) --cap-add=SYS_PTRACE --security-opt seccomp=unconfined --rm -t --name testrun-amd" steps: - name: Checkout devito diff --git a/.github/workflows/tutorials.yml b/.github/workflows/tutorials.yml index 292ae9bec8..9b9bb7f8ed 100644 --- a/.github/workflows/tutorials.yml +++ b/.github/workflows/tutorials.yml @@ -75,7 +75,7 @@ jobs: - name: Set run prefix run: | if [ "${{ matrix.name }}" == 'tutos-docker-gcc-py39' ]; then - echo "RUN_CMD=docker run --rm -t --name testrun devito_img" >> $GITHUB_ENV + echo "RUN_CMD=docker run --init -t --rm --name testrun devito_img" >> $GITHUB_ENV else echo "RUN_CMD=" >> $GITHUB_ENV fi diff --git a/devito/arch/archinfo.py b/devito/arch/archinfo.py index 42f0349b27..7dbfc420ce 100644 --- a/devito/arch/archinfo.py +++ b/devito/arch/archinfo.py @@ -270,7 +270,7 @@ def cbk(deviceid=0): for line in lines: if 'GPU' in line: # Product - pattern = r'GPU\[(\d+)\].*?Card series:\s*(.*?)\s*$' + pattern = r'GPU\[(\d+)\].*?Card [sS]eries:\s*(.*?)\s*$' match1 = re.match(pattern, line) if match1: @@ -280,7 +280,7 @@ def cbk(deviceid=0): gpu_infos[gid]['product'] = match1.group(2) # Model - pattern = r'GPU\[(\d+)\].*?Card model:\s*(.*?)\s*$' + pattern = r'GPU\[(\d+)\].*?Card [mM]odel:\s*(.*?)\s*$' match2 = re.match(pattern, line) if match2: diff --git a/docker/Dockerfile.amd b/docker/Dockerfile.amd index cb42f8e4b0..2478fbba18 100644 --- a/docker/Dockerfile.amd +++ b/docker/Dockerfile.amd @@ -3,12 +3,12 @@ # Based on https://github.com/amd/InfinityHub-CI/tree/main/base-gpu-mpi-rocm-docker ############################################################## -ARG ROCM_VERSION=5.5.1 +ARG ROCM_VERSION=6.3.2 FROM rocm/dev-ubuntu-22.04:${ROCM_VERSION}-complete as sdk-base -ARG UCX_BRANCH="v1.13.1" -ARG OMPI_BRANCH="v4.1.4" +ARG UCX_BRANCH="v1.14.1" +ARG OMPI_BRANCH="v5.0.6" # Update and Install basic Linux development tools RUN rm /etc/apt/sources.list.d/* \ @@ -46,7 +46,7 @@ ENV ROCM_HOME=/opt/rocm \ OMPI_HOME=/opt/ompi # Until rocm base has it fixed -RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch +RUN ln -s /opt/rocm/llvm/bin/offload-arch /opt/rocm/bin/offload-arch | echo "offload-arch already exis" # Install tmpi RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi @@ -73,10 +73,14 @@ RUN cd /tmp/ \ --without-knem \ --without-xpmem \ --without-cuda \ + --without-java \ + --enable-mt \ --enable-optimizations \ --disable-logging \ --disable-debug \ --disable-examples \ + --disable-assertions \ + --disable-params-check \ && make -j ${nproc} \ && make install @@ -87,17 +91,22 @@ RUN cd /tmp \ && ./autogen.pl \ && mkdir build \ && cd build \ - && ../configure --prefix=$OMPI_HOME --with-ucx=$UCX_HOME \ - CC=amdclang CXX=amdclang++ FC=amdflang F90=amdflang \ - --enable-mca-no-build=btl-uct \ + && ../configure CC=amdclang CXX=amdclang++ FC=amdflang F90=amdflang \ + --prefix=$OMPI_HOME \ + --with-ucx=$UCX_HOME \ + --with-rocm=$ROCM_HOME \ + --enable-mca-no-build=btl-uct \ --without-verbs \ - --with-pmix \ - --enable-mpi \ - --enable-mpi-fortran=yes \ + --enable-mpi1-compatibility \ + --enable-mpi-fortran=no \ --disable-debug \ && make -j ${nproc} \ && make install +# UCX config +ENV UCX_WARN_UNUSED_ENV_VARS=n +ENV UCX_TLS=sm,self,rocm,rocm_copy,rocm_ipc + # Cleanup RUN rm -rf /tmp/ucx && rm -rf /tmp/ompi diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index bed0bbad24..3b2ae714c7 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -18,7 +18,7 @@ RUN apt-get update && \ # Install for basic base not containing it RUN apt-get install -y vim wget git flex libnuma-dev tmux \ numactl hwloc curl \ - autoconf libtool build-essential procps + autoconf libtool build-essential procps software-properties-common # Install tmpi RUN curl https://raw.githubusercontent.com/Azrael3000/tmpi/master/tmpi -o /usr/local/bin/tmpi @@ -37,6 +37,12 @@ CMD ["/bin/bash"] ############################################################## FROM base as gcc +# Install gcc 13 for better hardware and software support +RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y && apt update && \ + apt install gcc-13 g++-13 -y && \ + update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-13 100 && \ + update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-13 100 + ARG OMPI_BRANCH="v4.1.4" # Install OpenMPI RUN mkdir -p /deps && mkdir -p /opt/openmpi && cd /deps && \ @@ -47,7 +53,7 @@ RUN mkdir -p /deps && mkdir -p /opt/openmpi && cd /deps && \ --enable-mca-no-build=btl-uct --enable-mpi1-compatibility && \ make -j ${nproc} && \ make install && \ - rm -rf /deps/openmpi + cd /deps && rm -rf /deps/openmpi # Set OpenMPI path ENV PATH=${PATH}:/opt/openmpi/bin