firedrakeproject · connorjward · Jan 23, 2025 · Jan 23, 2025 · Jan 24, 2025 · Jan 24, 2025
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -48,6 +48,7 @@ jobs:
       COMPLEX: ${{ matrix.complex }}
       RDMAV_FORK_SAFE: 1
       EXTRA_PYTEST_ARGS: --splitting-algorithm least_duration --timeout=1800 --timeout-method=thread -o faulthandler_timeout=1860 tests/firedrake
+      PYOP2_SPMD_STRICT: 1
     steps:
       - uses: actions/checkout@v4
 
@@ -95,7 +96,7 @@ jobs:
           sudo apt update
           sudo apt -y install parallel
           . ../firedrake_venv/bin/activate
-          python "$(which firedrake-clean)"
+          firedrake-clean
           python -m pip install pytest-timeout ipympl pytest-split pytest-xdist
           python -m pip list
 

diff --git a/.github/workflows/pip-mac.yml b/.github/workflows/pip-mac.yml
@@ -24,6 +24,7 @@ jobs:
     if: ${{ (github.ref == 'refs/heads/master') || contains(github.event.pull_request.labels.*.name, 'macOS') }}
     env:
       FIREDRAKE_CI_TESTS: 1
+      PYOP2_SPMD_STRICT: 1
       OMP_NUM_THREADS: 1
       OPENBLAS_NUM_THREADS: 1
     steps:
@@ -98,8 +99,9 @@ jobs:
       - name: Run Firedrake smoke tests
         run: |
           source pip_venv/bin/activate
+          firedrake-clean
           cd pip_venv/src/firedrake
-          make check CHECK_PYTEST_ARGS="--timeout 60"
+          make check CHECK_PYTEST_ARGS="--timeout 60 --timeout-method=thread"
         timeout-minutes: 10
 
       - name: Cleanup (post)

diff --git a/.github/workflows/pip.yml b/.github/workflows/pip.yml
@@ -38,6 +38,7 @@ jobs:
       # PETSC_DIR, HDF5_DIR and MPICH_DIR are set inside the docker image
       FIREDRAKE_CI_TESTS: 1
       PYOP2_CI_TESTS: 1
+      PYOP2_SPMD_STRICT: 1
       PETSC_ARCH: ${{ matrix.petsc_arch }}
       OMP_NUM_THREADS: 1
       OPENBLAS_NUM_THREADS: 1
@@ -82,8 +83,9 @@ jobs:
       - name: Run Firedrake smoke tests
         run: |
           source pip_venv/bin/activate
+          firedrake-clean
           cd pip_venv/src/firedrake
-          make check CHECK_PYTEST_ARGS="--timeout 60"
+          make check CHECK_PYTEST_ARGS="--timeout 60 --timeout-method=thread"
         timeout-minutes: 10
 
       - name: Publish Test Report

diff --git a/firedrake/functionspaceimpl.py b/firedrake/functionspaceimpl.py
@@ -876,7 +876,7 @@ class RestrictedFunctionSpace(FunctionSpace):
     def __init__(self, function_space, boundary_set=frozenset(), name=None):
         label = ""
         boundary_set_ = []
-        for boundary_domain in boundary_set:
+        for boundary_domain in sorted(boundary_set, key=str):
             if isinstance(boundary_domain, str):
                 boundary_set_.append(boundary_domain)
             else:
@@ -885,6 +885,9 @@ def __init__(self, function_space, boundary_set=frozenset(), name=None):
                 bd, = as_tuple(boundary_domain)
                 boundary_set_.append(bd)
         boundary_set = boundary_set_
+
+        # NOTE: boundary_set must be deterministically ordered here to ensure
+        # consistency between ranks
         for boundary_domain in boundary_set:
             label += str(boundary_domain)
             label += "_"

diff --git a/firedrake/slate/slac/compiler.py b/firedrake/slate/slac/compiler.py
@@ -90,7 +90,7 @@ def _compile_expression_comm(*args, **kwargs):
 
 @memory_and_disk_cache(
     hashkey=_compile_expression_hashkey,
-    comm_fetcher=_compile_expression_comm,
+    comm_getter=_compile_expression_comm,
     cachedir=tsfc_interface._cachedir
 )
 @PETSc.Log.EventDecorator()

diff --git a/firedrake/tsfc_interface.py b/firedrake/tsfc_interface.py
@@ -7,7 +7,7 @@
 from os import path, environ, getuid, makedirs
 import tempfile
 import collections
 import cachetools

 import ufl
 import finat.ufl
@@ -54,19 +54,23 @@
 
 
 def tsfc_compile_form_hashkey(form, prefix, parameters, interface, diagonal):
-    # Drop prefix as it's only used for naming
-    return default_parallel_hashkey(form.signature(), prefix, parameters, interface, diagonal)
+    return default_parallel_hashkey(
+        form.signature(),
+        prefix,
+        utils.tuplify(parameters),
+        type(interface).__name__,
+        diagonal,
+    )
 
 
-def _compile_form_comm(*args, **kwargs):
-    # args[0] is a form
-    return args[0].ufl_domains()[0].comm
+def _compile_form_comm(form, *args, **kwargs):
+    return form.ufl_domains()[0].comm
 
 
 # Decorate the original tsfc.compile_form with a cache
 tsfc_compile_form = memory_and_disk_cache(
     hashkey=tsfc_compile_form_hashkey,
-    comm_fetcher=_compile_form_comm,
+    comm_getter=_compile_form_comm,
     cachedir=_cachedir
 )(original_tsfc_compile_form)
 
@@ -133,23 +137,21 @@
 SplitKernel = collections.namedtuple("SplitKernel", ["indices", "kinfo"])
 
 
-def _compile_form_hashkey(*args, **kwargs):
-    # form, name, parameters, split, diagonal
-    parameters = kwargs.pop("parameters", None)
-    key = cachetools.keys.hashkey(
-        args[0].signature(),
-        *args[1:],
+def _compile_form_hashkey(form, name, parameters=None, split=True, interface=None, diagonal=False):
+    return (
+        form.signature(),
+        name,
         utils.tuplify(parameters),
-        **kwargs
+        split,
+        type(interface).__name__,
+        diagonal,
     )
-    kwargs.setdefault("parameters", parameters)
-    return key
 
 
 @PETSc.Log.EventDecorator()
 @memory_and_disk_cache(
     hashkey=_compile_form_hashkey,
-    comm_fetcher=_compile_form_comm,
+    comm_getter=_compile_form_comm,
     cachedir=_cachedir
 )
 @PETSc.Log.EventDecorator()

diff --git a/pyop2/caching.py b/pyop2/caching.py
@@ -52,6 +52,7 @@
 from pyop2.mpi import (
     MPI, COMM_WORLD, comm_cache_keyval, temp_internal_comm
 )
+import pytools
 from petsc4py import PETSc
 
 
@@ -365,7 +366,7 @@ def write(self, filehandle, value):
         pickle.dump(value, filehandle)
 
 
-def default_comm_fetcher(*args, **kwargs):
+def default_comm_getter(*args, **kwargs):
     """ A sensible default comm fetcher for use with `parallel_cache`.
     """
     comms = filter(
@@ -440,119 +441,89 @@ class DEFAULT_CACHE(dict):
     DictLikeDiskAccess = instrument(DictLikeDiskAccess)
 
 
-if configuration["spmd_strict"]:
-    def parallel_cache(
-        hashkey=default_parallel_hashkey,
-        comm_fetcher=default_comm_fetcher,
-        cache_factory=lambda: DEFAULT_CACHE(),
-    ):
-        """Parallel cache decorator (SPMD strict-enabled).
-        """
-        def decorator(func):
-            @PETSc.Log.EventDecorator("PyOP2 Cache Wrapper")
-            @wraps(func)
-            def wrapper(*args, **kwargs):
-                """ Extract the key and then try the memory cache before falling back
-                on calling the function and populating the cache. SPMD strict ensures
-                that all ranks cache hit or miss to ensure that the function evaluation
-                always occurs in parallel.
-                """
-                k = hashkey(*args, **kwargs)
-                key = _as_hexdigest(*k), func.__qualname__
-                # Create a PyOP2 comm associated with the key, so it is decrefed when the wrapper exits
-                with temp_internal_comm(comm_fetcher(*args, **kwargs)) as comm:
-                    # Fetch the per-comm cache_collection or set it up if not present
-                    # A collection is required since different types of cache can be set up on the same comm
-                    cache_collection = comm.Get_attr(comm_cache_keyval)
-                    if cache_collection is None:
-                        cache_collection = {}
-                        comm.Set_attr(comm_cache_keyval, cache_collection)
-                    # If this kind of cache is already present on the
-                    # cache_collection, get it, otherwise create it
-                    local_cache = cache_collection.setdefault(
-                        (cf := cache_factory()).__class__.__name__,
-                        cf
-                    )
-                    local_cache = cache_collection[cf.__class__.__name__]
-
-                    # If this is a new cache or function add it to the list of known caches
-                    if (comm, comm.name, func, local_cache) not in [(c.comm, c.comm_name, c.func, c.cache()) for c in _KNOWN_CACHES]:
-                        # When a comm is freed we do not hold a reference to the cache.
-                        # We attach a finalizer that extracts the stats before the cache
-                        # is deleted.
-                        _KNOWN_CACHES.append(_CacheRecord(next(_CACHE_CIDX), comm, func, local_cache))
-
-                    # Grab value from all ranks cache and broadcast cache hit/miss
-                    value = local_cache.get(key, CACHE_MISS)
-                    debug_string = f"{COMM_WORLD.name} R{COMM_WORLD.rank}, {comm.name} R{comm.rank}: "
-                    debug_string += f"key={k} in cache: {local_cache.__class__.__name__} cache "
-                    if value is CACHE_MISS:
-                        debug(debug_string + "miss")
-                        cache_hit = False
-                    else:
-                        debug(debug_string + "hit")
-                        cache_hit = True
-                    all_present = comm.allgather(cache_hit)
-
-                    # If not present in the cache of all ranks we force re-evaluation on all ranks
-                    if not min(all_present):
-                        value = CACHE_MISS
+def parallel_cache(
+    hashkey=default_parallel_hashkey,
+    comm_getter=default_comm_getter,
+    cache_factory=lambda: DEFAULT_CACHE(),
+    bcast=False,
+):
+    """Parallel cache decorator.
+
+    Parameters
+    ----------
+    hashkey :
+        Callable taking ``*args`` and ``**kwargs`` and returning a hash.
+    comm_getter :
+        Callable taking ``*args`` and ``**kwargs`` and returning the
+        appropriate communicator.
+    cache_factory :
+        Callable that will build a new cache (if one does not exist).
+    bcast :
+        If `True`, then generate the new cache value on one rank and broadcast
+        to the others. If `False` then values are generated on all ranks.
+        This option can only be `True` if the operation can be executed in
+        serial; else it will deadlock.
 
+    """
+    def decorator(func):
+        @PETSc.Log.EventDecorator("pyop2: cache wrapper")
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            # Extract the key and then try the memory cache before falling back
+            # to calling the function and populating the cache.
+            k = hashkey(*args, **kwargs)
+            key = _as_hexdigest(*k), func.__qualname__
+
+            # Create a PyOP2 comm associated with the key, so it is decrefed when the wrapper exits
+            with temp_internal_comm(comm_getter(*args, **kwargs)) as comm:
+                if configuration["spmd_strict"] and not pytools.is_single_valued(comm.allgather(key)):
+                    raise ValueError("Cache keys differ between ranks")
+
+                # Fetch the per-comm cache_collection or set it up if not present
+                # A collection is required since different types of cache can be set up on the same comm
+                cache_collection = comm.Get_attr(comm_cache_keyval)
+                if cache_collection is None:
+                    cache_collection = {}
+                    comm.Set_attr(comm_cache_keyval, cache_collection)
+                # If this kind of cache is already present on the
+                # cache_collection, get it, otherwise create it
+                local_cache = cache_collection.setdefault(
+                    (cf := cache_factory()).__class__.__name__,
+                    cf
+                )
+                local_cache = cache_collection[cf.__class__.__name__]
+
+                # If this is a new cache or function add it to the list of known caches
+                if (comm, comm.name, func, local_cache) not in [(c.comm, c.comm_name, c.func, c.cache()) for c in _KNOWN_CACHES]:
+                    # When a comm is freed we do not hold a reference to the cache.
+                    # We attach a finalizer that extracts the stats before the cache
+                    # is deleted.
+                    _KNOWN_CACHES.append(_CacheRecord(next(_CACHE_CIDX), comm, func, local_cache))
+
+                # Grab value from all ranks cache and broadcast cache hit/miss
+                value = local_cache.get(key, CACHE_MISS)
+                debug_string = f"{COMM_WORLD.name} R{COMM_WORLD.rank}, {comm.name} R{comm.rank}: "
+                debug_string += f"key={k} in cache: {local_cache.__class__.__name__} cache "
                 if value is CACHE_MISS:
+                    debug(debug_string + "miss")
+                    cache_hit = False
+                else:
+                    debug(debug_string + "hit")
+                    cache_hit = True
+
+                if configuration["spmd_strict"] and not pytools.is_single_valued(comm.allgather(cache_hit)):
+                    raise ValueError("Cache hit on some ranks but missed on others")
+
+            if value is CACHE_MISS:
+                if bcast:
+                    value = func(*args, **kwargs) if comm.rank == 0 else None
+                    value = comm.bcast(value, root=0)
+                else:
                     value = func(*args, **kwargs)
-                return local_cache.setdefault(key, value)
-
-            return wrapper
-        return decorator
-else:
-    def parallel_cache(
-        hashkey=default_parallel_hashkey,
-        comm_fetcher=default_comm_fetcher,
-        cache_factory=lambda: DEFAULT_CACHE(),
-    ):
-        """Parallel cache decorator.
-        """
-        def decorator(func):
-            @PETSc.Log.EventDecorator("PyOP2 Cache Wrapper")
-            @wraps(func)
-            def wrapper(*args, **kwargs):
-                """ Extract the key and then try the memory cache before falling back
-                on calling the function and populating the cache.
-                """
-                k = hashkey(*args, **kwargs)
-                key = _as_hexdigest(*k), func.__qualname__
-                # Create a PyOP2 comm associated with the key, so it is decrefed when the wrapper exits
-                with temp_internal_comm(comm_fetcher(*args, **kwargs)) as comm:
-                    # Fetch the per-comm cache_collection or set it up if not present
-                    # A collection is required since different types of cache can be set up on the same comm
-                    cache_collection = comm.Get_attr(comm_cache_keyval)
-                    if cache_collection is None:
-                        cache_collection = {}
-                        comm.Set_attr(comm_cache_keyval, cache_collection)
-                    # If this kind of cache is already present on the
-                    # cache_collection, get it, otherwise create it
-                    local_cache = cache_collection.setdefault(
-                        (cf := cache_factory()).__class__.__name__,
-                        cf
-                    )
-                    local_cache = cache_collection[cf.__class__.__name__]
-
-                    # If this is a new cache or function add it to the list of known caches
-                    if (comm, comm.name, func, local_cache) not in [(c.comm, c.comm_name, c.func, c.cache()) for c in _KNOWN_CACHES]:
-                        # When a comm is freed we do not hold a reference to the cache.
-                        # We attach a finalizer that extracts the stats before the cache
-                        # is deleted.
-                        _KNOWN_CACHES.append(_CacheRecord(next(_CACHE_CIDX), comm, func, local_cache))
-
-                    value = local_cache.get(key, CACHE_MISS)
 
-                if value is CACHE_MISS:
-                    with PETSc.Log.Event("pyop2: handle cache miss"):
-                        value = func(*args, **kwargs)
-                return local_cache.setdefault(key, value)
-
-            return wrapper
-        return decorator
+            return local_cache.setdefault(key, value)
+        return wrapper
+    return decorator
 
 
 def clear_memory_cache(comm):