From e268ff6088aa6c7331f4ea5635d2dade7a256b33 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 26 Nov 2024 10:23:44 -0500 Subject: [PATCH 1/4] Run compute-sanitizer in CI checks compute-sanitizer can help disover subtle bugs that are otherwise hard to reproduce (such as the one solved in #1944). --- .github/workflows/build_and_test.yaml | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index b753f6422a..d1868f8ec4 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -282,6 +282,21 @@ jobs: env: _HOOMD_DISALLOW_CUPY_: 1 + ### compute-sanitizer checks: These checks are slow, so run them only when validation is + # requested, but no on the actual long validation tests. Every class's basic test should ensure + # that the GPU kernel is called for at least a few timesteps for these checks to be effective. + - name: Run memcheck + if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} + run: compute-sanitizer --tool racecheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + + - name: Run racecheck + if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} + run: compute-sanitizer --tool racecheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + + - name: Run synccheck + if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} + run: compute-sanitizer --tool synccheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + ### Validation tests - name: Run pytest -m validate (serial) if: ${{ !contains(inputs.config, 'mpi') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} From afe4adbbdc79fe332493fa6d7cf5244987a93122 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 26 Nov 2024 13:41:08 -0500 Subject: [PATCH 2/4] Actually run memcheck. Co-authored-by: Tim Moore --- .github/workflows/build_and_test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml index d1868f8ec4..a8040f6ca1 100644 --- a/.github/workflows/build_and_test.yaml +++ b/.github/workflows/build_and_test.yaml @@ -287,7 +287,7 @@ jobs: # that the GPU kernel is called for at least a few timesteps for these checks to be effective. - name: Run memcheck if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} - run: compute-sanitizer --tool racecheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 + run: compute-sanitizer --tool memcheck --error-exitcode 1 python3 -m pytest --pyargs hoomd -x -v -ra --durations=0 --durations-min=0.1 - name: Run racecheck if: ${{ contains(inputs.config, 'cuda') && contains(github.event.pull_request.labels.*.name, 'validate') && inputs.validate == 'true' }} From 21788306356ecceb377f0d618b04e8c6247a6b71 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 26 Nov 2024 14:37:32 -0500 Subject: [PATCH 3/4] Fix read after write hazard in mesh.conservation.Volume --- hoomd/md/VolumeConservationMeshForceComputeGPU.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu index 3c9b95ab26..24eeea1386 100644 --- a/hoomd/md/VolumeConservationMeshForceComputeGPU.cu +++ b/hoomd/md/VolumeConservationMeshForceComputeGPU.cu @@ -164,6 +164,7 @@ __global__ void gpu_volume_reduce_partial_sum_kernel(Scalar* d_sum, if (threadIdx.x < offs) volume_sdata[threadIdx.x] += volume_sdata[threadIdx.x + offs]; offs >>= 1; + __syncthreads(); } // everybody sums up sum2K From 7f22fd2fc72978576d91be7fedf53cd0b59c5f31 Mon Sep 17 00:00:00 2001 From: "Joshua A. Anderson" Date: Tue, 26 Nov 2024 14:40:33 -0500 Subject: [PATCH 4/4] Document mesh.conservation.volume fix. --- CHANGELOG.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 09ccef7d23..8447ceb11b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -24,6 +24,8 @@ Change Log * Read after write hazard in the GPU implementation of ``Dipole``, ``ALJ``, and all ``Patchy`` potentials in ``hoomd.md.pair.ansiso`` (`#1944 `__). +* Read after write hazard in the GPU implementation of ``hoomd.md.mesh.conservation.Volume`` + (`#1953 `__). *Added*