adding routines for cleaning up distributed process groups (#50)

NVIDIA · Aug 29, 2024 · b2ce590 · b2ce590
1 parent 24fcb06
commit b2ce590
Show file tree

Hide file tree

Showing 5 changed files with 22 additions and 4 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -30,14 +30,15 @@
 # build after cloning in directoy torch_harmonics via
 # docker build . -t torch_harmonics
 
-FROM nvcr.io/nvidia/pytorch:24.07-py3
+FROM nvcr.io/nvidia/pytorch:24.08-py3
 
 COPY . /workspace/torch_harmonics
 
 # we need this for tests
 RUN pip install parameterized
 
 # The custom CUDA extension does not suppport architerctures < 7.0
+ENV FORCE_CUDA_EXTENSION=1
 ENV TORCH_CUDA_ARCH_LIST "7.0 7.2 7.5 8.0 8.6 8.7 9.0+PTX"
-RUN pip install --global-option --cuda_ext /workspace/torch_harmonics
+RUN cd /workspace/torch_harmonics && pip install --no-build-isolation .
 
diff --git a/tests/test_distributed_convolution.py b/tests/test_distributed_convolution.py
@@ -112,6 +112,11 @@ def setUpClass(cls):
         # initializing sht
         thd.init(cls.h_group, cls.w_group)
 
+    @classmethod
+    def tearDownClass(cls):
+	thd.finalize()
+	dist.destroy_process_group(None)
+
     def _split_helper(self, tensor):
         with torch.no_grad():
             # split in W
@@ -185,7 +190,7 @@ def _gather_helper_bwd(self, tensor, B, C, convolution_dist):
             [128, 256, 128, 256, 32, 8, [3], 2, "equiangular", "equiangular", False, 1e-5],
             [128, 256, 128, 256, 32, 6, [3], 1, "equiangular", "equiangular", False, 1e-5],
             [128, 256, 128, 256, 32, 8, [3], 1, "equiangular", "equiangular", True, 1e-5],
-            [129, 256, 128, 256, 32, 8, [3], 1, "equiangular", "equiangular", True, 1e-5],
+            [129, 256, 129, 256, 32, 8, [3], 1, "equiangular", "equiangular", True, 1e-5],
             [128, 256, 128, 256, 32, 8, [3, 2], 1, "equiangular", "equiangular", True, 1e-5],
             [64, 128, 128, 256, 32, 8, [3], 1, "equiangular", "equiangular", True, 1e-5],
             [128, 256, 128, 256, 32, 8, [3], 2, "equiangular", "equiangular", True, 1e-5],

diff --git a/tests/test_distributed_sht.py b/tests/test_distributed_sht.py
@@ -118,6 +118,11 @@ def setUpClass(cls):
         # initializing sht
         thd.init(cls.h_group, cls.w_group)
 
+    @classmethod
+    def tearDownClass(cls):
+        thd.finalize()
+        dist.destroy_process_group(None)
+
 
     def _split_helper(self, tensor):
         with torch.no_grad():

diff --git a/torch_harmonics/distributed/__init__.py b/torch_harmonics/distributed/__init__.py
@@ -30,7 +30,7 @@
 #
 
 # we need this in order to enable distributed
-from .utils import init, is_initialized, polar_group, azimuth_group
+from .utils import init, finalize, is_initialized, polar_group, azimuth_group
 from .utils import polar_group_size, azimuth_group_size, polar_group_rank, azimuth_group_rank
 from .primitives import compute_split_shapes, split_tensor_along_dim
 from .primitives import (

diff --git a/torch_harmonics/distributed/utils.py b/torch_harmonics/distributed/utils.py
@@ -51,6 +51,13 @@ def init(polar_process_group, azimuth_process_group):
     _AZIMUTH_PARALLEL_GROUP = azimuth_process_group
     _IS_INITIALIZED = True
 
+def finalize():
+    if is_initialized():
+        if is_distributed_polar():
+            dist.destroy_process_group(_POLAR_PARALLEL_GROUP)
+        if is_distributed_azimuth():
+            ist.destroy_process_group(_AZIMUTH_PARALLEL_GROUP)
+
 def is_initialized() -> bool:
     return _IS_INITIALIZED