From 15eee4940318cfbd2baccc5a6a2eba9d4aab67f4 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Fri, 17 Jun 2022 15:10:34 +0300 Subject: [PATCH 1/4] add recursive clustering and skeletonization --- doc/conf.py | 4 + doc/linalg.rst | 22 +- pytential/linalg/__init__.py | 16 - pytential/linalg/cluster.py | 489 +++++++++++++++++++++ pytential/linalg/direct_solver_symbolic.py | 13 +- pytential/linalg/proxy.py | 255 ++++------- pytential/linalg/skeletonization.py | 282 +++++++++--- pytential/linalg/utils.py | 53 ++- pytential/symbolic/matrix.py | 5 +- test/extra_matrix_data.py | 10 +- test/test_linalg_cluster.py | 129 ++++++ test/test_linalg_proxy.py | 14 +- test/test_linalg_skeletonization.py | 110 ++--- test/test_matrix.py | 8 +- 14 files changed, 1081 insertions(+), 329 deletions(-) create mode 100644 pytential/linalg/cluster.py create mode 100644 test/test_linalg_cluster.py diff --git a/doc/conf.py b/doc/conf.py index 31bc6933f..9d69973a7 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -17,6 +17,10 @@ "DOFDescriptorLike": "pytential.symbolic.dof_desc.DOFDescriptorLike", } +nitpick_ignore_regex = [ + ["py:class", r".*_ProxyNeighborEvaluationResult"], + ] + intersphinx_mapping = { "arraycontext": ("https://documen.tician.de/arraycontext", None), "boxtree": ("https://documen.tician.de/boxtree", None), diff --git a/doc/linalg.rst b/doc/linalg.rst index 97497434f..d3d4870ce 100644 --- a/doc/linalg.rst +++ b/doc/linalg.rst @@ -8,7 +8,7 @@ scheme is used: component of the Stokeslet. * ``cluster`` refers to a piece of a ``block`` as used by the recursive proxy-based skeletonization of the direct solver algorithms. Clusters - are represented by a :class:`~pytential.linalg.TargetAndSourceClusterList`. + are represented by a :class:`~pytential.linalg.utils.TargetAndSourceClusterList`. GMRES ----- @@ -20,17 +20,31 @@ GMRES Hierarchical Direct Solver -------------------------- +.. note:: + + High-level API for direct solvers is in progress. + +Low-level Functionality +----------------------- + .. warning:: All the classes and routines in this module are experimental and the API can change at any point. +.. automodule:: pytential.linalg.skeletonization +.. automodule:: pytential.linalg.cluster .. automodule:: pytential.linalg.proxy -.. automodule:: pytential.linalg.utils -Internal Functionality ----------------------- +Internal Functionality and Utilities +------------------------------------ + +.. warning:: + All the classes and routines in this module are experimental and the + API can change at any point. + +.. automodule:: pytential.linalg.utils .. automodule:: pytential.linalg.direct_solver_symbolic .. vim: sw=4:tw=75:fdm=marker diff --git a/pytential/linalg/__init__.py b/pytential/linalg/__init__.py index bfbd4d82e..eed539cc8 100644 --- a/pytential/linalg/__init__.py +++ b/pytential/linalg/__init__.py @@ -25,25 +25,9 @@ make_index_list, make_index_cluster_cartesian_product, interp_decomp, ) -from pytential.linalg.proxy import ( - ProxyClusterGeometryData, ProxyPointTarget, ProxyPointSource, - ProxyGeneratorBase, ProxyGenerator, QBXProxyGenerator, - partition_by_nodes, gather_cluster_neighbor_points, - ) -from pytential.linalg.skeletonization import ( - SkeletonizationWrangler, make_skeletonization_wrangler, - SkeletonizationResult, skeletonize_by_proxy, - ) __all__ = ( "IndexList", "TargetAndSourceClusterList", "make_index_list", "make_index_cluster_cartesian_product", "interp_decomp", - - "ProxyClusterGeometryData", "ProxyPointTarget", "ProxyPointSource", - "ProxyGeneratorBase", "ProxyGenerator", "QBXProxyGenerator", - "partition_by_nodes", "gather_cluster_neighbor_points", - - "SkeletonizationWrangler", "make_skeletonization_wrangler", - "SkeletonizationResult", "skeletonize_by_proxy", ) diff --git a/pytential/linalg/cluster.py b/pytential/linalg/cluster.py new file mode 100644 index 000000000..7a90e2cac --- /dev/null +++ b/pytential/linalg/cluster.py @@ -0,0 +1,489 @@ +from __future__ import annotations + + +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import pathlib +from collections.abc import Iterator +from dataclasses import dataclass, replace +from functools import singledispatch +from typing import Any + +import numpy as np + +from pytools import memoize_method +from pytools.obj_array import make_obj_array + +from arraycontext import PyOpenCLArrayContext +from boxtree.tree import Tree +from meshmode.discretization import Discretization +from pytential import sym, GeometryCollection +from pytential.linalg.proxy import ProxyGenerator +from pytential.linalg.utils import IndexList, TargetAndSourceClusterList +from pytential.qbx import QBXLayerPotentialSource + +__doc__ = """ +Clustering +~~~~~~~~~~ + +.. autoclass:: ClusterLevel +.. autoclass:: ClusterTree + +.. autofunction:: split_array +.. autofunction:: cluster +.. autofunction:: uncluster + +.. autofunction:: partition_by_nodes +""" + +# FIXME: this is just an arbitrary value +_DEFAULT_MAX_PARTICLES_IN_BOX = 32 + + +# {{{ cluster tree + + +def make_cluster_parent_map(parent_ids: np.ndarray) -> np.ndarray: + """Construct a parent map for :attr:`ClusterLevel.parent_map`.""" + # NOTE: np.unique returns a sorted array + unique_parent_ids = np.unique(parent_ids) + ids = np.arange(parent_ids.size) + + return make_obj_array([ + ids[parent_ids == unique_parent_ids[i]] + for i in range(unique_parent_ids.size) + ]) + + +@dataclass(frozen=True) +class ClusterLevel: + """A level in a :class:`ClusterTree`. + + .. attribute:: level + + Current level that is represented. + + .. attribute:: nclusters + + Number of clusters on the current level (same as number of boxes + in :attr:`box_ids`). + + .. attribute:: box_ids + + Box IDs on the current level. + + .. attribute:: parent_map + + An object :class:`~numpy.ndarray` containing buckets of child indices, + i.e. ``parent_map[i]`` contains all the child indices that will cluster + into the same parent. Note that this indexing is local to this level + and is not related to the tree indexing stored by the :class:`ClusterTree`. + """ + + level: int + box_ids: np.ndarray + parent_map: np.ndarray + + @property + def nclusters(self): + return self.box_ids.size + + +@dataclass(frozen=True) +class ClusterTree: + r"""Hierarchical cluster representation. + + .. attribute:: nlevels + + Total number of levels in the tree. + + .. attribute:: leaf_cluster_box_ids + + Box IDs for each cluster on the leaf level of the tree. + + .. attribute:: tree_cluster_parent_ids + + Parent box IDs for :attr:`leaf_cluster_box_ids`. + + .. attribute:: levels + + An :class:`~numpy.ndarray` of :class:`ClusterLevel`\ s. + """ + + nlevels: int + leaf_cluster_box_ids: np.ndarray + tree_cluster_parent_ids: np.ndarray + + # NOTE: only here to allow easier debugging + testing + _tree: Tree | None + + @property + def nclusters(self): + return self.leaf_cluster_box_ids.size + + @property + @memoize_method + def levels(self) -> np.ndarray: + return make_obj_array(list(self.iter_levels())) + + def iter_levels(self) -> Iterator[ClusterLevel]: + """ + :returns: an iterator over all the :class:`ClusterLevel` levels. + """ + + box_ids = self.leaf_cluster_box_ids + parent_ids = self.tree_cluster_parent_ids[box_ids] + clevel = ClusterLevel( + level=self.nlevels - 1, + box_ids=box_ids, + parent_map=make_cluster_parent_map(parent_ids), + ) + + for _ in range(self.nlevels - 1, -1, -1): + yield clevel + + box_ids = np.unique(self.tree_cluster_parent_ids[clevel.box_ids]) + parent_ids = self.tree_cluster_parent_ids[box_ids] + clevel = ClusterLevel( + level=clevel.level - 1, + box_ids=box_ids, + parent_map=make_cluster_parent_map(parent_ids) + ) + + assert clevel.nclusters == 1 + +# }}} + + +# {{{ cluster + +def split_array(x: np.ndarray, index: IndexList) -> np.ndarray: + """ + :returns: an object :class:`~numpy.ndarray` where each entry contains the + elements of the :math:`i`-th cluster in *index*. + """ + assert x.size == index.nindices + + from pytools.obj_array import make_obj_array + return make_obj_array([ + index.cluster_take(x, i) for i in range(index.nclusters) + ]) + + +@singledispatch +def cluster(obj: Any, clevel: ClusterLevel) -> Any: + """Merge together elements of *obj* into their parent object, as described + by :attr:`ClusterLevel.parent_map`. + """ + raise NotImplementedError(type(obj).__name__) + + +@cluster.register(IndexList) +def _cluster_index_list(obj: IndexList, clevel: ClusterLevel) -> IndexList: + assert obj.nclusters == clevel.nclusters + + if clevel.nclusters == 1: + return obj + + from pytential.linalg.utils import make_index_list + indices = make_obj_array([ + np.concatenate([obj.cluster_indices(i) for i in ppm]) + for ppm in clevel.parent_map + ]) + + return make_index_list(indices) + + +@cluster.register(TargetAndSourceClusterList) +def _cluster_target_and_source_cluster_list( + obj: TargetAndSourceClusterList, clevel: ClusterLevel, + ) -> TargetAndSourceClusterList: + assert obj.nclusters == clevel.nclusters + + if clevel.nclusters == 1: + return obj + + return replace(obj, + targets=cluster(obj.targets, clevel), + sources=cluster(obj.sources, clevel)) + + +@cluster.register(np.ndarray) +def _cluster_ndarray(obj: np.ndarray, clevel: ClusterLevel) -> np.ndarray: + assert obj.shape == (clevel.nclusters,) + if clevel.nclusters == 1: + return obj + + def make_block(i: int, j: int): + if i == j: + return obj[i] + + return np.zeros((obj[i].shape[0], obj[j].shape[1]), dtype=obj[i].dtype) + + from pytools import single_valued + ndim = single_valued(block.ndim for block in obj) + + if ndim == 1: + return make_obj_array([ + np.concatenate([obj[i] for i in ppm]) for ppm in clevel.parent_map + ]) + elif ndim == 2: + return make_obj_array([ + np.block([[make_block(i, j) for j in ppm] for i in ppm]) + for ppm in clevel.parent_map + ]) + else: + raise ValueError(f"unsupported ndarray dimension: '{ndim}'") + +# }}} + + +# {{{ uncluster + +def uncluster(ary: np.ndarray, index: IndexList, clevel: ClusterLevel) -> np.ndarray: + """Performs the reverse of :func:`cluster` on object arrays. + + :arg ary: an object :class:`~numpy.ndarray` with a shape that matches + :attr:`ClusterLevel.parent_map`. + :arg index: an :class:`~pytential.linalg.utils.IndexList` for the + current level, as given by :attr:`ClusterLevel.box_ids`. + :returns: an object :class:`~numpy.ndarray` with a shape that matches + :attr:`ClusterLevel.box_ids` of all the elements of *ary* that belong + to each child cluster. + """ + assert ary.dtype.char == "O" + assert ary.shape == (clevel.parent_map.size,) + + if index.nclusters == 1: + return ary + + result: np.ndarray = np.empty(index.nclusters, dtype=object) + for ifrom, ppm in enumerate(clevel.parent_map): + offset = 0 + for ito in ppm: + cluster_size = index.cluster_size(ito) + result[ito] = ary[ifrom][offset:offset + cluster_size] + offset += cluster_size + + assert ary[ifrom].shape == (offset,) + + return result + +# }}} + + +# {{{ cluster generation + +def _build_binary_ish_tree_from_starts(starts: np.ndarray) -> ClusterTree: + partition_box_ids: np.ndarray[tuple[int, ...], Any] = np.arange(starts.size - 1) + + box_ids = partition_box_ids + + box_parent_ids = [] + offset = box_ids.size + while box_ids.size > 1: + # NOTE: this is probably not the most efficient way to do it, but this + # code is mostly meant for debugging using a simple tree + clusters = np.array_split(box_ids, box_ids.size // 2) + parent_ids = offset + np.arange(len(clusters)) + box_parent_ids.append(np.repeat(parent_ids, [len(c) for c in clusters])) + + box_ids = parent_ids + offset += box_ids.size + + # NOTE: make the root point to itself + box_parent_ids.append(np.array([offset - 1])) + nlevels = len(box_parent_ids) + + return ClusterTree( + nlevels=nlevels, + leaf_cluster_box_ids=partition_box_ids, + tree_cluster_parent_ids=np.concatenate(box_parent_ids), + _tree=None) + + +def partition_by_nodes( + actx: PyOpenCLArrayContext, places: GeometryCollection, *, + dofdesc: sym.DOFDescriptorLike | None = None, + tree_kind: str | None = "adaptive-level-restricted", + max_particles_in_box: int | None = None) -> tuple[IndexList, ClusterTree]: + """Generate equally sized ranges of nodes. The partition is created at the + lowest level of granularity, i.e. nodes. This results in balanced ranges + of points, but will split elements across different ranges. + + :arg dofdesc: a :class:`~pytential.symbolic.dof_desc.DOFDescriptor` for + the geometry in *places* which should be partitioned. + :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. + :arg max_particles_in_box: value used to control the number of points + in each partition (and thus the number of partitions). See the documentation + in :class:`boxtree.TreeBuilder`. + """ + if dofdesc is None: + dofdesc = places.auto_source + dofdesc = sym.as_dofdesc(dofdesc) + + if max_particles_in_box is None: + max_particles_in_box = _DEFAULT_MAX_PARTICLES_IN_BOX + + lpot_source = places.get_geometry(dofdesc.geometry) + assert isinstance(lpot_source, Discretization | QBXLayerPotentialSource) + + discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + assert isinstance(discr, Discretization) + + if tree_kind is not None: + setup_actx = lpot_source._setup_actx + assert isinstance(setup_actx, PyOpenCLArrayContext) + + from pytential.qbx.utils import tree_code_container + tcc = tree_code_container(setup_actx) + + from arraycontext import flatten + from meshmode.dof_array import DOFArray + tree, _ = tcc.build_tree()(actx.queue, + particles=flatten( + actx.thaw(discr.nodes()), actx, leaf_class=DOFArray + ), + max_particles_in_box=max_particles_in_box, + kind=tree_kind) + + from boxtree import box_flags_enum + tree = tree.get(actx.queue) + # FIXME maybe this should use IS_LEAF once available? + leaf_boxes, = ( + tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0 + ).nonzero() + + # FIXME: this annotation is not needed with numpy 2.0 + indices: np.ndarray = np.empty(len(leaf_boxes), dtype=object) + starts = None + + for i, ibox in enumerate(leaf_boxes): + box_start = tree.box_source_starts[ibox] + box_end = box_start + tree.box_source_counts_cumul[ibox] + indices[i] = tree.user_source_ids[box_start:box_end] + + ctree = ClusterTree( + nlevels=tree.nlevels, + leaf_cluster_box_ids=leaf_boxes, + tree_cluster_parent_ids=tree.box_parent_ids, + _tree=tree) + else: + if discr.ambient_dim != 2 and discr.dim == 1: + raise ValueError("only curves are supported for 'tree_kind=None'") + + nclusters = max(discr.ndofs // max_particles_in_box, 2) + indices = np.arange(0, discr.ndofs, dtype=np.int64) + starts = np.linspace(0, discr.ndofs, nclusters + 1, dtype=np.int64) + + # FIXME: mypy seems to be able to figure this out with numpy 2.0 + assert starts is not None + assert starts[-1] == discr.ndofs + + ctree = _build_binary_ish_tree_from_starts(starts) + + from pytential.linalg import make_index_list + return make_index_list(indices, starts=starts), ctree + +# }}} + + +# {{{ visualize clusters + +def visualize_clusters(actx: PyOpenCLArrayContext, + generator: ProxyGenerator, + srcindex: IndexList, + tree: ClusterTree, + filename: str | pathlib.Path, *, + dofdesc: sym.DOFDescriptorLike = None, + overwrite: bool = False) -> None: + filename = pathlib.Path(filename) + + places = generator.places + if dofdesc is None: + dofdesc = places.auto_source + dofdesc = sym.as_dofdesc(dofdesc) + + discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + assert isinstance(discr, Discretization) + + if discr.ambient_dim != 2: + raise NotImplementedError(f"Unsupported dimension: {discr.ambient_dim}") + + import matplotlib.pyplot as pt + + from arraycontext import flatten + from boxtree.visualization import TreePlotter + from meshmode.dof_array import DOFArray + + x, y = actx.to_numpy(flatten(discr.nodes(), actx, leaf_class=DOFArray)) + for clevel in tree.levels: + outfile = filename.with_stem(f"{filename.stem}-{clevel.level:03d}") + if not overwrite and outfile.exists(): + raise FileExistsError(f"Output file '{outfile}' already exists") + + pxy = generator(actx, dofdesc, srcindex).to_numpy(actx) + pxycenters = pxy.centers + pxyradii = pxy.radii + clsradii = pxy.cluster_radii + + fig = pt.figure() + ax = fig.gca() + + plotter = TreePlotter(tree._tree) + plotter.set_bounding_box() + plotter.draw_tree(fill=False, edgecolor="black", zorder=10) + + ax.plot(x, y, "ko", ms=2.0) + for i in range(srcindex.nclusters): + isrc = srcindex.cluster_indices(i) + ax.plot(x[isrc], y[isrc], "o", ms=2.0) + + from itertools import cycle + colors = cycle(pt.rcParams["axes.prop_cycle"].by_key()["color"]) + + for ppm in clevel.parent_map: + color = next(colors) + for j in ppm: + center = (pxycenters[0, j], pxycenters[1, j]) + c = pt.Circle(center, pxyradii[j], color=color, alpha=0.1) + ax.add_artist(c) + c = pt.Circle(center, clsradii[j], color=color, alpha=0.1) + ax.add_artist(c) + ax.text(*center, f"{j}", fontsize=18) + + ax.set_xlabel("$x$") + ax.set_ylabel("$y$") + ax.relim() + ax.autoscale() + ax.set_aspect("equal") + + fig.savefig(outfile) + pt.close(fig) + + srcindex = cluster(srcindex, clevel) + +# }}} + + +# vim: foldmethod=marker diff --git a/pytential/linalg/direct_solver_symbolic.py b/pytential/linalg/direct_solver_symbolic.py index fc6322f5f..118b94597 100644 --- a/pytential/linalg/direct_solver_symbolic.py +++ b/pytential/linalg/direct_solver_symbolic.py @@ -20,6 +20,8 @@ THE SOFTWARE. """ +import numpy as np + from pytools.obj_array import make_obj_array from pytential.symbolic.mappers import ( @@ -44,9 +46,12 @@ class PROXY_SKELETONIZATION_TARGET: # noqa: N801 def prepare_expr(places, exprs, auto_where=None): from pytential.symbolic.execution import _prepare_expr - return make_obj_array([ - _prepare_expr(places, expr, auto_where=auto_where) - for expr in exprs]) + if isinstance(exprs, np.ndarray): + return make_obj_array([ + _prepare_expr(places, expr, auto_where=auto_where) + for expr in exprs]) + + return _prepare_expr(places, exprs, auto_where=auto_where) def prepare_proxy_expr(places, exprs, auto_where=None): @@ -224,3 +229,5 @@ def __init__(self, source, target): self.operand_rec = _LocationReplacer(source, default_source=source) # }}} + +# vim: foldmethod=marker diff --git a/pytential/linalg/proxy.py b/pytential/linalg/proxy.py index 23555cccb..16708b4fb 100644 --- a/pytential/linalg/proxy.py +++ b/pytential/linalg/proxy.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2018 Alexandru Fikl" __license__ = """ @@ -27,6 +30,7 @@ import numpy as np import numpy.linalg as la +import arraycontext from arraycontext import PyOpenCLArrayContext, flatten from meshmode.discretization import Discretization from meshmode.dof_array import DOFArray @@ -42,13 +46,14 @@ import loopy as lp from loopy.version import MOST_RECENT_LANGUAGE_VERSION +import logging +logger = logging.getLogger(__name__) + __doc__ = """ Proxy Point Generation ~~~~~~~~~~~~~~~~~~~~~~ -.. currentmodule:: pytential.linalg - .. autoclass:: ProxyPointSource .. autoclass:: ProxyPointTarget .. autoclass:: ProxyClusterGeometryData @@ -57,7 +62,6 @@ .. autoclass:: ProxyGenerator .. autoclass:: QBXProxyGenerator -.. autofunction:: partition_by_nodes .. autofunction:: gather_cluster_neighbor_points """ @@ -65,83 +69,6 @@ _DEFAULT_MAX_PARTICLES_IN_BOX = 32 -# {{{ point index partitioning - -def partition_by_nodes( - actx: PyOpenCLArrayContext, places: GeometryCollection, *, - dofdesc: DOFDescriptorLike | None = None, - tree_kind: str | None = "adaptive-level-restricted", - max_particles_in_box: int | None = None) -> IndexList: - """Generate equally sized ranges of nodes. The partition is created at the - lowest level of granularity, i.e. nodes. This results in balanced ranges - of points, but will split elements across different ranges. - - :arg dofdesc: a :class:`~pytential.symbolic.dof_desc.DOFDescriptor` for - the geometry in *places* which should be partitioned. - :arg tree_kind: if not *None*, it is passed to :class:`boxtree.TreeBuilder`. - :arg max_particles_in_box: value used to control the number of points - in each partition (and thus the number of partitions). See the documentation - in :class:`boxtree.TreeBuilder`. - """ - if dofdesc is None: - dofdesc = places.auto_source - dofdesc = sym.as_dofdesc(dofdesc) - - if max_particles_in_box is None: - max_particles_in_box = _DEFAULT_MAX_PARTICLES_IN_BOX - - from pytential.source import LayerPotentialSourceBase - - lpot_source = places.get_geometry(dofdesc.geometry) - assert isinstance(lpot_source, LayerPotentialSourceBase) - - discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) - assert isinstance(discr, Discretization) - - if tree_kind is not None: - from pytential.qbx.utils import tree_code_container - tcc = tree_code_container(lpot_source._setup_actx) - - tree, _ = tcc.build_tree()(actx.queue, - particles=flatten( - actx.thaw(discr.nodes()), actx, leaf_class=DOFArray - ), - max_particles_in_box=max_particles_in_box, - kind=tree_kind) - - from boxtree import box_flags_enum - tree = tree.get(actx.queue) - # FIXME maybe this should use IS_LEAF once available? - leaf_boxes, = ( - tree.box_flags & box_flags_enum.HAS_SOURCE_OR_TARGET_CHILD_BOXES == 0 - ).nonzero() - - indices: np.ndarray = np.empty(len(leaf_boxes), dtype=object) - starts: np.ndarray | None = None - - for i, ibox in enumerate(leaf_boxes): - box_start = tree.box_source_starts[ibox] - box_end = box_start + tree.box_source_counts_cumul[ibox] - indices[i] = tree.user_source_ids[box_start:box_end] - else: - if discr.ambient_dim != 2 and discr.dim == 1: - raise ValueError("only curves are supported for 'tree_kind=None'") - - nclusters = max(discr.ndofs // max_particles_in_box, 2) - indices = np.arange(0, discr.ndofs, dtype=np.int64) - starts = np.linspace(0, discr.ndofs, nclusters + 1, dtype=np.int64) - - # FIXME: I'm not sure why mypy can't figure this out. - assert starts is not None - - assert starts[-1] == discr.ndofs - - from pytential.linalg import make_index_list - return make_index_list(indices, starts=starts) - -# }}} - - # {{{ proxy points class ProxyPointSource(PointPotentialSource): @@ -189,31 +116,17 @@ def __init__(self, @dataclass(frozen=True) class ProxyClusterGeometryData: - """ - .. attribute:: srcindex - - A :class:`~pytential.linalg.IndexList` describing which cluster - of points each proxy ball was created from. - - .. attribute:: pxyindex - - A :class:`~pytential.linalg.IndexList` describing which proxies - belong to which cluster. - - .. attribute:: points - - A concatenated array of all the proxy points. Can be sliced into - using :attr:`pxyindex` (shape ``(dim, nproxies)``). - - .. attribute:: centers - - An array of all the proxy ball centers (shape ``(dim, nclusters)``). - - .. attribute:: radii - - An array of all the proxy ball radii (shape ``(nclusters,)``). - - .. attribute:: nclusters + """The proxy information generated by :class:`ProxyGeneratorBase`. + + .. autoattribute:: nclusters + .. autoattribute:: places + .. autoattribute:: dofdesc + .. autoattribute:: srcindex + .. autoattribute:: pxyindex + .. autoattribute:: points + .. autoattribute:: centers + .. autoattribute:: radii + .. autoattribute:: cluster_radii .. automethod:: __init__ .. automethod:: to_numpy @@ -222,39 +135,55 @@ class ProxyClusterGeometryData: """ places: GeometryCollection + """Geometry collection containing the used :attr:`dofdesc`.""" dofdesc: sym.DOFDescriptor + """A descriptor for the geometry used to compute the proxy points.""" srcindex: IndexList + """A :class:`~pytential.linalg.utils.IndexList` describing which cluster + of points each proxy ball was created from. + """ pxyindex: IndexList + """A :class:`~pytential.linalg.utils.IndexList` describing which proxies + belong to which cluster. + """ points: np.ndarray + """A concatenated array of all the proxy points. Can be sliced into + using :attr:`pxyindex` (shape ``(dim, nproxies)``). + """ centers: np.ndarray + """An array of all the proxy ball centers (shape ``(dim, nclusters)``).""" radii: np.ndarray + """An array of all the proxy ball radii (shape ``(nclusters,)``).""" - _cluster_radii: np.ndarray | None = None + cluster_radii: np.ndarray + """An array of all the cluster ball radii (shape ``(nclusters,)``). The + proxy radii are essentially just the cluster radii multiplied by the + ``radius_factor``. + """ @property def nclusters(self) -> int: + """Number of clusters.""" return self.srcindex.nclusters @property def discr(self): + """The discretization that corresponds to :attr:`dofdesc` in :attr:`places`.""" return self.places.get_discretization( self.dofdesc.geometry, self.dofdesc.discr_stage) - def to_numpy(self, actx: PyOpenCLArrayContext) -> "ProxyClusterGeometryData": - if self._cluster_radii is not None: - cluster_radii = actx.to_numpy(self._cluster_radii) - else: - cluster_radii = None - + def to_numpy(self, actx: PyOpenCLArrayContext) -> ProxyClusterGeometryData: + """Convert the proxy geometry data to :mod:`numpy` arrays.""" from dataclasses import replace return replace(self, points=np.stack(actx.to_numpy(self.points)), centers=np.stack(actx.to_numpy(self.centers)), radii=actx.to_numpy(self.radii), - _cluster_radii=cluster_radii) + cluster_radii=actx.to_numpy(self.cluster_radii), + ) def as_sources(self) -> ProxyPointSource: lpot_source = self.places.get_geometry(self.dofdesc.geometry) @@ -422,7 +351,7 @@ def get_radii_kernel_ex(self, actx: PyOpenCLArrayContext) -> lp.ExecutorBase: raise NotImplementedError def __call__(self, - actx: PyOpenCLArrayContext, + actx: arraycontext.PyOpenCLArrayContext, source_dd: DOFDescriptorLike | None, dof_index: IndexList, **kwargs: Any) -> ProxyClusterGeometryData: @@ -441,8 +370,6 @@ def __call__(self, source_dd.geometry, source_dd.discr_stage) assert isinstance(discr, Discretization) - include_cluster_radii = kwargs.pop("include_cluster_radii", False) - # {{{ get proxy centers and radii sources = flatten(discr.nodes(), actx, leaf_class=DOFArray) @@ -454,25 +381,13 @@ def __call__(self, srcstarts=dof_index.starts) knl = self.get_radii_kernel_ex(actx) - _, (radii_dev,) = knl(actx.queue, + _, (cluster_radii,) = knl(actx.queue, sources=sources, srcindices=dof_index.indices, srcstarts=dof_index.starts, - radius_factor=self.radius_factor, proxy_centers=centers_dev, **kwargs) - - if include_cluster_radii: - knl = make_compute_cluster_radii_kernel_ex(actx, self.ambient_dim) - _, (cluster_radii,) = knl(actx.queue, - sources=sources, - srcindices=dof_index.indices, - srcstarts=dof_index.starts, - radius_factor=1.0, - proxy_centers=centers_dev) - cluster_radii = actx.freeze(cluster_radii) - else: - cluster_radii = None + radii_dev = self.radius_factor * cluster_radii # }}} @@ -505,7 +420,7 @@ def __call__(self, points=actx.freeze(actx.from_numpy(proxies)), centers=actx.freeze(centers_dev), radii=actx.freeze(radii_dev), - _cluster_radii=cluster_radii + cluster_radii=actx.freeze(cluster_radii), ) @@ -527,13 +442,12 @@ def prg(): - sources[idim, srcindices[i + ioffset]]) ** 2) )) - proxy_radius[icluster] = radius_factor * cluster_radius + proxy_radius[icluster] = cluster_radius end """, [ lp.GlobalArg("sources", None, shape=(ndim, "nsources"), dim_tags="sep,C", offset=lp.auto), lp.ValueArg("nsources", np.int64), - lp.ValueArg("radius_factor", np.float64), ... ], name="compute_cluster_radii_knl", @@ -587,7 +501,7 @@ def prg(): <> rqbx = rqbx_int if rqbx_ext < rqbx_int else rqbx_ext - proxy_radius[icluster] = radius_factor * rqbx + proxy_radius[icluster] = rqbx end """, [ lp.GlobalArg("sources", None, @@ -597,7 +511,6 @@ def prg(): lp.GlobalArg("center_ext", None, shape=(ndim, "nsources"), dim_tags="sep,C", offset=lp.auto), lp.ValueArg("nsources", np.int64), - lp.ValueArg("radius_factor", np.float64), ... ], name="compute_cluster_qbx_radii_knl", @@ -651,24 +564,40 @@ def __call__(self, # {{{ gather_cluster_neighbor_points def gather_cluster_neighbor_points( - actx: PyOpenCLArrayContext, pxy: ProxyClusterGeometryData, *, + actx: PyOpenCLArrayContext, + pxy: ProxyClusterGeometryData, + tgtindex: IndexList | None = None, + *, max_particles_in_box: int | None = None) -> IndexList: - """Generate a set of neighboring points for each cluster of points in - *pxy*. Neighboring points of a cluster :math:`i` are defined - as all the points inside the proxy ball :math:`i` that do not also - belong to the cluster itself. + r"""Generate a set of neighboring points for each cluster of points in *pxy*. + + Neighboring points of a cluster :math:`i` are defined as all the points + from *tgtindex* that are inside the proxy ball :math:`i` but outside the + cluster itself. For example, given a cluster with radius :math:`r_s` and + proxy radius :math:`r_p > r_s`, then we gather all points such that + :math:`r_s < \|\mathbf{x}\| <= r_p`. """ + srcindex = pxy.srcindex + if tgtindex is None: + tgtindex = srcindex + + nclusters = srcindex.nclusters + if tgtindex.nclusters != nclusters: + raise ValueError("'tgtindex' have a different number of clusters: " + f"'{tgtindex.nclusters}' (expected {nclusters})") + if max_particles_in_box is None: max_particles_in_box = _DEFAULT_MAX_PARTICLES_IN_BOX - from pytential.source import LayerPotentialSourceBase - dofdesc = pxy.dofdesc lpot_source = pxy.places.get_geometry(dofdesc.geometry) - assert isinstance(lpot_source, LayerPotentialSourceBase) - discr = pxy.places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + + assert ( + dofdesc.discr_stage is None + or isinstance(lpot_source, QBXLayerPotentialSource) + ), (dofdesc, type(lpot_source)) assert isinstance(discr, Discretization) # {{{ get only sources in the current cluster set @@ -696,18 +625,23 @@ def prg() -> lp.ExecutorBase: return knl.executor(actx.context) - _, (sources,) = prg()(actx.queue, + _, (targets,) = prg()(actx.queue, ary=flatten(discr.nodes(), actx, leaf_class=DOFArray), - srcindices=pxy.srcindex.indices) + srcindices=tgtindex.indices) # }}} # {{{ perform area query from pytential.qbx.utils import tree_code_container - tcc = tree_code_container(lpot_source._setup_actx) - tree, _ = tcc.build_tree()(actx.queue, sources, + # NOTE: use the base source's actx for caching the code -- that has + # the best chance of surviving even when updating the lpot_source + setup_actx = discr._setup_actx + assert isinstance(setup_actx, PyOpenCLArrayContext) + + tcc = tree_code_container(setup_actx) + tree, _ = tcc.build_tree()(actx.queue, targets, max_particles_in_box=max_particles_in_box) query, _ = tcc.build_area_query()(actx.queue, tree, pxy.centers, pxy.radii) @@ -720,10 +654,10 @@ def prg() -> lp.ExecutorBase: pxycenters = actx.to_numpy(pxy.centers) pxyradii = actx.to_numpy(pxy.radii) - srcindex = pxy.srcindex - nbrindices: np.ndarray = np.empty(srcindex.nclusters, dtype=object) - for icluster in range(srcindex.nclusters): + eps = 100 * np.finfo(pxyradii.dtype).eps + nbrindices: np.ndarray = np.empty(nclusters, dtype=object) + for icluster in range(nclusters): # get list of boxes intersecting the current ball istart = query.leaves_near_ball_starts[icluster] iend = query.leaves_near_ball_starts[icluster + 1] @@ -742,16 +676,17 @@ def prg() -> lp.ExecutorBase: isources = tree.user_source_ids[isources] # get nodes inside the ball but outside the current cluster - # FIXME: this assumes that only the points in `pxy.secindex` should - # count as neighbors, not all the nodes in the discretization. - # FIXME: it also assumes that all the indices are sorted? center = pxycenters[:, icluster].reshape(-1, 1) - radius = pxyradii[icluster] - mask = ((la.norm(nodes - center, axis=0) < radius) - & ((isources < srcindex.starts[icluster]) - | (srcindex.starts[icluster + 1] <= isources))) - - nbrindices[icluster] = srcindex.indices[isources[mask]] + radii = la.norm(nodes - center, axis=0) - eps + mask = ( + (radii <= pxyradii[icluster]) + & ((isources < tgtindex.starts[icluster]) + | (tgtindex.starts[icluster + 1] <= isources))) + + nbrindices[icluster] = tgtindex.indices[isources[mask]] + if nbrindices[icluster].size == 0: + logger.warning("Cluster '%d' has no neighbors. You might need to " + "increase the proxy 'radius_factor'.", icluster) # }}} diff --git a/pytential/linalg/skeletonization.py b/pytential/linalg/skeletonization.py index 365a44aa3..b57db6975 100644 --- a/pytential/linalg/skeletonization.py +++ b/pytential/linalg/skeletonization.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2018-2022 Alexandru Fikl" __license__ = """ @@ -22,7 +25,7 @@ from collections.abc import Callable, Hashable, Sequence from dataclasses import dataclass -from typing import Any +from typing import TYPE_CHECKING, Any import numpy as np @@ -32,22 +35,28 @@ from pytential.symbolic.matrix import ClusterMatrixBuilderBase from pytential.linalg.utils import IndexList, TargetAndSourceClusterList from pytential.linalg.proxy import ProxyGeneratorBase, ProxyClusterGeometryData +from pytential.linalg.cluster import ClusterTree, cluster from pytential.linalg.direct_solver_symbolic import ( PROXY_SKELETONIZATION_TARGET, PROXY_SKELETONIZATION_SOURCE, prepare_expr, prepare_proxy_expr) +if TYPE_CHECKING: + from pytential.linalg.utils import IndexList, TargetAndSourceClusterList -__doc__ = """ -.. currentmodule:: pytential.linalg +import logging +logger = logging.getLogger(__name__) + +__doc__ = """ Skeletonization ---------------- +~~~~~~~~~~~~~~~ .. autoclass:: SkeletonizationWrangler .. autoclass:: make_skeletonization_wrangler .. autoclass:: SkeletonizationResult .. autofunction:: skeletonize_by_proxy +.. autofunction:: rec_skeletonize_by_proxy """ @@ -107,7 +116,7 @@ def _approximate_geometry_waa_magnitude( actx: PyOpenCLArrayContext, places: GeometryCollection, - cluster_index: "IndexList", + cluster_index: IndexList, domain: sym.DOFDescriptor) -> Array: """ :arg cluster_index: a :class:`~pytential.linalg.IndexList` representing the @@ -132,7 +141,9 @@ def prg(): """ <> ioffset = starts[icluster] <> npoints = starts[icluster + 1] - ioffset - result[icluster] = reduce(sum, i, waa[indices[i + ioffset]]) / npoints + result[icluster] = ( + reduce(sum, i, abs(waa[indices[i + ioffset]])) / npoints + if npoints > 0 else 1.0) """, lang_version=lp.MOST_RECENT_LANGUAGE_VERSION, ) @@ -232,15 +243,17 @@ class SkeletonizationWrangler: .. attribute:: proxy_source_cluster_builder .. attribute:: proxy_target_cluster_builder - A callable that is used to evaluate farfield proxy interactions. + A subclass of ``pytential.symbolic.matrix.ClusterMatrixBuilderBase`` + that is used to evaluate farfield proxy interactions. This should follow the calling convention of the constructor to - :class:`pytential.symbolic.matrix.P2PClusterMatrixBuilder`. + ``pytential.symbolic.matrix.P2PClusterMatrixBuilder``. .. attribute:: neighbor_cluster_builder - A callable that is used to evaluate nearfield neighbour interactions. + A subclass of ``pytential.symbolic.matrix.ClusterMatrixBuilderBase`` + that is used to evaluate nearfield neighbour interactions. This should follow the calling convention of the constructor to - :class:`pytential.symbolic.matrix.QBXClusterMatrixBuilder`. + ``pytential.symbolic.matrix.QBXClusterMatrixBuilder``. .. automethod:: evaluate_source_proxy_interaction .. automethod:: evaluate_target_proxy_interaction @@ -291,35 +304,53 @@ def _evaluate_expr(self, context=self.context, **kwargs)(expr) + def evaluate_self( + self, + actx: PyOpenCLArrayContext, + places: GeometryCollection, + tgt_src_index: TargetAndSourceClusterList, + ibrow: int, ibcol: int, + ) -> np.ndarray: + cls = self.neighbor_cluster_builder + return self._evaluate_expr( + actx, places, cls, tgt_src_index, self.exprs[ibrow], + idomain=ibcol, _weighted=True) + # {{{ nearfield - def evaluate_source_neighbor_interaction(self, + def evaluate_source_neighbor_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, nbrindex: IndexList, *, - ibrow: int, ibcol: int) -> tuple[np.ndarray, TargetAndSourceClusterList]: + ibrow: int, ibcol: int, + ) -> tuple[np.ndarray, TargetAndSourceClusterList]: + from pytential.linalg.utils import TargetAndSourceClusterList nbr_src_index = TargetAndSourceClusterList(nbrindex, pxy.srcindex) eval_mapper_cls = self.neighbor_cluster_builder expr = self.exprs[ibrow] mat = self._evaluate_expr( actx, places, eval_mapper_cls, nbr_src_index, expr, - idomain=ibcol, _weighted=self.weighted_sources) + idomain=ibcol, _weighted=True) return mat, nbr_src_index - def evaluate_target_neighbor_interaction(self, + def evaluate_target_neighbor_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, nbrindex: IndexList, *, - ibrow: int, ibcol: int) -> tuple[np.ndarray, TargetAndSourceClusterList]: + ibrow: int, ibcol: int, + ) -> tuple[np.ndarray, TargetAndSourceClusterList]: + from pytential.linalg.utils import TargetAndSourceClusterList tgt_nbr_index = TargetAndSourceClusterList(pxy.srcindex, nbrindex) eval_mapper_cls = self.neighbor_cluster_builder expr = self.exprs[ibrow] mat = self._evaluate_expr( actx, places, eval_mapper_cls, tgt_nbr_index, expr, - idomain=ibcol, _weighted=self.weighted_targets) + idomain=ibcol, _weighted=True) return mat, tgt_nbr_index @@ -327,17 +358,25 @@ def evaluate_target_neighbor_interaction(self, # {{{ proxy - def evaluate_source_proxy_interaction(self, + def evaluate_source_proxy_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, nbrindex: IndexList, *, - ibrow: int, ibcol: int) -> tuple[np.ndarray, TargetAndSourceClusterList]: - from pytential.collection import add_geometry_to_collection + ibrow: int, ibcol: int, + ) -> tuple[np.ndarray, TargetAndSourceClusterList]: + from pytential.linalg.utils import TargetAndSourceClusterList pxy_src_index = TargetAndSourceClusterList(pxy.pxyindex, pxy.srcindex) + + from pytential.collection import add_geometry_to_collection places = add_geometry_to_collection( places, {PROXY_SKELETONIZATION_TARGET: pxy.as_targets()} ) + if not self.weighted_sources: + logger.warning("Source-Proxy weighting is turned off. This will not give " + "good results for skeletonization.", stacklevel=3) + eval_mapper_cls = self.proxy_source_cluster_builder expr = self.source_proxy_exprs[ibrow] mat = self._evaluate_expr( @@ -348,13 +387,17 @@ def evaluate_source_proxy_interaction(self, return mat, pxy_src_index - def evaluate_target_proxy_interaction(self, + def evaluate_target_proxy_interaction( + self, actx: PyOpenCLArrayContext, places: GeometryCollection, pxy: ProxyClusterGeometryData, nbrindex: IndexList, *, - ibrow: int, ibcol: int) -> tuple[np.ndarray, TargetAndSourceClusterList]: - from pytential.collection import add_geometry_to_collection + ibrow: int, ibcol: int, + ) -> tuple[np.ndarray, TargetAndSourceClusterList]: + from pytential.linalg.utils import TargetAndSourceClusterList tgt_pxy_index = TargetAndSourceClusterList(pxy.srcindex, pxy.pxyindex) + + from pytential.collection import add_geometry_to_collection places = add_geometry_to_collection( places, {PROXY_SKELETONIZATION_SOURCE: pxy.as_sources()} ) @@ -371,6 +414,9 @@ def evaluate_target_proxy_interaction(self, mat = _apply_weights( actx, mat, places, tgt_pxy_index, nbrindex, self.domains[ibcol]) + else: + logger.warning("Target-Proxy weighting is turned off. This will not give " + "good results for skeletonization.", stacklevel=3) return mat, tgt_pxy_index @@ -541,7 +587,8 @@ def _evaluate_proxy_skeletonization_interaction( places: GeometryCollection, proxy_generator: ProxyGeneratorBase, wrangler: SkeletonizationWrangler, - cluster_index: IndexList, *, + source_index: IndexList, + target_index: IndexList, *, evaluate_proxy: Callable[..., tuple[np.ndarray, TargetAndSourceClusterList]], evaluate_neighbor: Callable[..., @@ -553,23 +600,24 @@ def _evaluate_proxy_skeletonization_interaction( each cluster in *cluster_index*. """ - if cluster_index.nclusters == 1: + if source_index.nclusters == 1: raise ValueError("cannot make a proxy skeleton for a single cluster") - from pytential.linalg import gather_cluster_neighbor_points - pxy = proxy_generator(actx, dofdesc, cluster_index) + from pytential.linalg.proxy import gather_cluster_neighbor_points + pxy = proxy_generator(actx, dofdesc, source_index) nbrindex = gather_cluster_neighbor_points( - actx, pxy, + actx, pxy, target_index, max_particles_in_box=max_particles_in_box) pxymat, pxy_cluster_index = evaluate_proxy(actx, places, pxy, nbrindex) nbrmat, nbr_cluster_index = evaluate_neighbor(actx, places, pxy, nbrindex) - - return _ProxyNeighborEvaluationResult( + result = _ProxyNeighborEvaluationResult( pxy=pxy, pxymat=pxymat, pxyindex=pxy_cluster_index, nbrmat=nbrmat, nbrindex=nbr_cluster_index) + return result + def _skeletonize_block_by_proxy_with_mats( actx: PyOpenCLArrayContext, ibrow: int, ibcol: int, @@ -581,7 +629,7 @@ def _skeletonize_block_by_proxy_with_mats( id_rank: int | None = None, max_particles_in_box: int | None = None, rng: np.random.Generator | None = None, - ) -> "SkeletonizationResult": + ) -> SkeletonizationResult: nclusters = tgt_src_index.nclusters if nclusters == 1: raise ValueError("cannot make a proxy skeleton for a single cluster") @@ -595,7 +643,7 @@ def _skeletonize_block_by_proxy_with_mats( max_particles_in_box=max_particles_in_box) src_result = evaluate_skeletonization_interaction( - tgt_src_index.sources, + tgt_src_index.sources, tgt_src_index.targets, evaluate_proxy=partial( wrangler.evaluate_source_proxy_interaction, ibrow=ibrow, ibcol=ibcol), @@ -604,7 +652,7 @@ def _skeletonize_block_by_proxy_with_mats( ibrow=ibrow, ibcol=ibcol), ) tgt_result = evaluate_skeletonization_interaction( - tgt_src_index.targets, + tgt_src_index.targets, tgt_src_index.sources, evaluate_proxy=partial( wrangler.evaluate_target_proxy_interaction, ibrow=ibrow, ibcol=ibcol), @@ -613,8 +661,8 @@ def _skeletonize_block_by_proxy_with_mats( ibrow=ibrow, ibcol=ibcol) ) - src_skl_indices: np.ndarray = np.empty(nclusters, dtype=object) - tgt_skl_indices: np.ndarray = np.empty(nclusters, dtype=object) + skel_src_indices: np.ndarray = np.empty(nclusters, dtype=object) + skel_tgt_indices: np.ndarray = np.empty(nclusters, dtype=object) skel_starts: np.ndarray = np.zeros(nclusters + 1, dtype=np.int32) L: np.ndarray = np.empty(nclusters, dtype=object) @@ -643,7 +691,7 @@ def _skeletonize_block_by_proxy_with_mats( interp = interp[:k, :] L[i] = interp.T - tgt_skl_indices[i] = tgt_src_index.targets.cluster_indices(i)[idx[:k]] + skel_tgt_indices[i] = tgt_src_index.targets.cluster_indices(i)[idx[:k]] assert L[i].shape == (tgt_mat.shape[0], k) # skeletonize source points @@ -651,22 +699,49 @@ def _skeletonize_block_by_proxy_with_mats( assert 0 < k <= len(idx) R[i] = interp - src_skl_indices[i] = tgt_src_index.sources.cluster_indices(i)[idx[:k]] + skel_src_indices[i] = tgt_src_index.sources.cluster_indices(i)[idx[:k]] assert R[i].shape == (k, src_mat.shape[1]) skel_starts[i + 1] = skel_starts[i] + k - assert tgt_skl_indices[i].shape == src_skl_indices[i].shape + assert skel_tgt_indices[i].shape == skel_src_indices[i].shape + + # evaluate diagonal + from pytential.linalg.utils import make_flat_cluster_diag + mat = wrangler.evaluate_self(actx, places, tgt_src_index, ibrow, ibcol) + D = make_flat_cluster_diag(mat, tgt_src_index) - from pytential.linalg import make_index_list - src_skl_index = make_index_list(np.hstack(list(src_skl_indices)), skel_starts) - tgt_skl_index = make_index_list(np.hstack(list(tgt_skl_indices)), skel_starts) - skel_tgt_src_index = TargetAndSourceClusterList(tgt_skl_index, src_skl_index) + from pytential.linalg import TargetAndSourceClusterList, make_index_list + skel_src_index = make_index_list(np.hstack(list(skel_src_indices)), skel_starts) + skel_tgt_index = make_index_list(np.hstack(list(skel_tgt_indices)), skel_starts) + skel_tgt_src_index = TargetAndSourceClusterList(skel_tgt_index, skel_src_index) return SkeletonizationResult( - L=L, R=R, + L=L, R=R, D=D, tgt_src_index=tgt_src_index, skel_tgt_src_index=skel_tgt_src_index, _src_eval_result=src_result, _tgt_eval_result=tgt_result) + +def _evaluate_root( + actx: PyOpenCLArrayContext, ibrow: int, ibcol: int, + places: GeometryCollection, + wrangler: SkeletonizationWrangler, + tgt_src_index: TargetAndSourceClusterList + ) -> SkeletonizationResult: + assert tgt_src_index.nclusters == 1 + + from pytential.linalg.utils import make_flat_cluster_diag + mat = wrangler.evaluate_self(actx, places, tgt_src_index, ibrow, ibcol) + D = make_flat_cluster_diag(mat, tgt_src_index) + + from pytools.obj_array import make_obj_array + return SkeletonizationResult( + L=make_obj_array([np.eye(*D[0].shape)]), + R=make_obj_array([np.eye(*D[0].shape)]), + D=D, + tgt_src_index=tgt_src_index, skel_tgt_src_index=tgt_src_index, + _src_eval_result=None, _tgt_eval_result=None, + ) + # }}} @@ -705,21 +780,27 @@ class SkeletonizationResult: An object :class:`~numpy.ndarray` of size ``(nclusters,)``. + .. attribute:: D + + An object :class:`~numpy.ndarray` of size ``(nclusters,)``. This + contains the diagonal blocks for :attr:`tgt_src_index`. + .. attribute:: tgt_src_index - A :class:`~pytential.linalg.TargetAndSourceClusterList` representing the - indices in the original matrix :math:`A` that have been skeletonized. + A :class:`~pytential.linalg.utils.TargetAndSourceClusterList` representing + the indices in the original matrix :math:`A` that has been skeletonized. .. attribute:: skel_tgt_src_index - A :class:`~pytential.linalg.TargetAndSourceClusterList` representing a - subset of :attr:`tgt_src_index`, i.e. the skeleton of each cluster of + A :class:`~pytential.linalg.utils.TargetAndSourceClusterList` representing + a subset of :attr:`tgt_src_index`, i.e. the skeleton of each cluster of :math:`A`. These indices can be used to reconstruct the :math:`S` matrix. """ L: np.ndarray R: np.ndarray + D: np.ndarray tgt_src_index: TargetAndSourceClusterList skel_tgt_src_index: TargetAndSourceClusterList @@ -759,6 +840,7 @@ def skeletonize_by_proxy( input_exprs: sym.var | Sequence[sym.var], *, domains: Sequence[Hashable] | None = None, context: dict[str, Any] | None = None, + auto_where: Any = None, approx_nproxy: int | None = None, proxy_radius_factor: float | None = None, @@ -769,7 +851,7 @@ def skeletonize_by_proxy( max_particles_in_box: int | None = None) -> np.ndarray: r"""Evaluate and skeletonize a symbolic expression using proxy-based methods. - :arg tgt_src_index: a :class:`~pytential.linalg.TargetAndSourceClusterList` + :arg tgt_src_index: a :class:`~pytential.linalg.utils.TargetAndSourceClusterList` indicating which indices participate in the skeletonization. :arg exprs: see :func:`make_skeletonization_wrangler`. @@ -777,8 +859,8 @@ def skeletonize_by_proxy( :arg domains: see :func:`make_skeletonization_wrangler`. :arg context: see :func:`make_skeletonization_wrangler`. - :arg approx_nproxy: see :class:`~pytential.linalg.ProxyGenerator`. - :arg proxy_radius_factor: see :class:`~pytential.linalg.ProxyGenerator`. + :arg approx_nproxy: see :class:`~pytential.linalg.proxy.ProxyGenerator`. + :arg proxy_radius_factor: see :class:`~pytential.linalg.proxy.ProxyGenerator`. :arg id_eps: a floating point value used as a tolerance in skeletonizing each block in *tgt_src_index*. @@ -792,21 +874,105 @@ def skeletonize_by_proxy( from pytential.linalg.proxy import QBXProxyGenerator wrangler = make_skeletonization_wrangler( places, exprs, input_exprs, - domains=domains, context=context) + domains=domains, context=context, auto_where=auto_where) proxy = QBXProxyGenerator(places, approx_nproxy=approx_nproxy, radius_factor=proxy_radius_factor) + if wrangler.nrows != 1 or wrangler.ncols != 1: + raise NotImplementedError("support for block matrices") + + from itertools import product + skels: np.ndarray = np.empty((wrangler.nrows, wrangler.ncols), dtype=object) - for ibrow in range(wrangler.nrows): - for ibcol in range(wrangler.ncols): - skels[ibrow, ibcol] = _skeletonize_block_by_proxy_with_mats( - actx, ibrow, ibcol, places, proxy, wrangler, tgt_src_index, - id_eps=id_eps, - id_rank=id_rank, - max_particles_in_box=max_particles_in_box, - rng=rng) + for ibrow, ibcol in product(range(wrangler.nrows), range(wrangler.ncols)): + skels[ibrow, ibcol] = _skeletonize_block_by_proxy_with_mats( + actx, ibrow, ibcol, places, proxy, wrangler, tgt_src_index, + id_eps=id_eps, id_rank=id_rank, + max_particles_in_box=max_particles_in_box, + rng=rng) return skels # }}} + + +# {{{ recursive skeletonization by proxy + +def rec_skeletonize_by_proxy( + actx: PyOpenCLArrayContext, + places: GeometryCollection, + + ctree: ClusterTree, + tgt_src_index: TargetAndSourceClusterList, + exprs: sym.var | Sequence[sym.var], + input_exprs: sym.var | Sequence[sym.var], *, + domains: Sequence[Hashable] | None = None, + context: dict[str, Any] | None = None, + auto_where: Any = None, + + approx_nproxy: int | None = None, + proxy_radius_factor: float | None = None, + + id_eps: float | None = None, + rng: np.random.Generator | None = None, + max_particles_in_box: int | None = None, + + _wrangler: SkeletonizationWrangler | None = None, + _proxy: ProxyGeneratorBase | None = None) -> np.ndarray: + r"""Performs recursive skeletonization based on :func:`skeletonize_by_proxy`. + + :returns: an object :class:`~numpy.ndarray` of :class:`SkeletonizationResult`\ s, + one per level in *ctree*. + """ + + assert ctree.nclusters == tgt_src_index.nclusters + + if id_eps is None: + id_eps = 1.0e-8 + + if _proxy is None: + from pytential.linalg.proxy import QBXProxyGenerator + proxy: ProxyGeneratorBase = QBXProxyGenerator(places, + approx_nproxy=approx_nproxy, + radius_factor=proxy_radius_factor) + else: + proxy = _proxy + + if _wrangler is None: + wrangler = make_skeletonization_wrangler( + places, exprs, input_exprs, + domains=domains, context=context, auto_where=auto_where) + else: + wrangler = _wrangler + + if wrangler.nrows != 1 or wrangler.ncols != 1: + raise NotImplementedError("support for block matrices") + + from itertools import product + skel_per_level: np.ndarray = np.empty(ctree.nlevels, dtype=object) + for i, clevel in enumerate(ctree.levels[:-1]): + for ibrow, ibcol in product(range(wrangler.nrows), range(wrangler.ncols)): + skeleton = _skeletonize_block_by_proxy_with_mats( + actx, ibrow, ibcol, proxy.places, proxy, wrangler, tgt_src_index, + id_eps=id_eps, + # NOTE: we probably never want to set the rank here? + id_rank=None, + rng=rng, + max_particles_in_box=max_particles_in_box) + + skel_per_level[i] = skeleton + tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) + + assert tgt_src_index.nclusters == 1 + assert not isinstance(skel_per_level[-1], SkeletonizationResult) + + # evaluate the full root cluster (no skeletonization or anything) + skeleton = _evaluate_root(actx, 0, 0, places, wrangler, tgt_src_index) + skel_per_level[-1] = skeleton + + return skel_per_level + +# }}} + +# vim: foldmethod=marker diff --git a/pytential/linalg/utils.py b/pytential/linalg/utils.py index eca334568..ee64b1847 100644 --- a/pytential/linalg/utils.py +++ b/pytential/linalg/utils.py @@ -1,3 +1,6 @@ +from __future__ import annotations + + __copyright__ = "Copyright (C) 2018-2022 Alexandru Fikl" __license__ = """ @@ -34,16 +37,14 @@ __doc__ = """ -Misc -~~~~ - -.. currentmodule:: pytential.linalg - .. autoclass:: IndexList .. autoclass:: TargetAndSourceClusterList .. autofunction:: make_index_list .. autofunction:: make_index_cluster_cartesian_product +.. autofunction:: make_flat_cluster_diag + +.. autofunction:: interp_decomp """ @@ -54,6 +55,7 @@ class IndexList: """Convenience class for working with clusters (subsets) of an array. .. attribute:: nclusters + .. attribute:: nindices .. attribute:: indices An :class:`~numpy.ndarray` of not necessarily continuous or increasing @@ -78,6 +80,10 @@ class IndexList: def nclusters(self) -> int: return self.starts.size - 1 + @property + def nindices(self) -> int: + return self.indices.size + def cluster_size(self, i: int) -> int: if not (0 <= i < self.nclusters): raise IndexError( @@ -113,6 +119,11 @@ class TargetAndSourceClusterList: """Convenience class for working with clusters (subsets) of a matrix. .. attribute:: nclusters + + .. attribute:: shape + + Shape of the Cartesian product of the :attr:`targets` and :attr:`sources`. + .. attribute:: targets An :class:`IndexList` encapsulating target cluster indices. @@ -153,6 +164,13 @@ def _flat_cluster_starts(self): def _flat_total_size(self): return self._flat_cluster_starts[-1] + def __iter__(self): + return iter((self.targets, self.sources)) + + @property + def shape(self): + return (self.targets.nindices, self.sources.nindices) + def cluster_shape(self, i: int, j: int) -> tuple[int, int]: r""" :returns: the shape of the cluster ``(i, j)``, where *i* indexes into @@ -308,15 +326,14 @@ def make_flat_cluster_diag( correspondence to the index sets constructed by :func:`make_index_cluster_cartesian_product` for *mindex*. - :returns: a block diagonal object :class:`~numpy.ndarray`, where each - diagonal element :math:`(i, i)` is the reshaped slice of *mat* that - corresponds to the cluster :math:`i`. + :returns: an object :class:`~numpy.ndarray`, where each element represents + the block of a block-diagonal matrix. """ - cluster_mat: np.ndarray = np.full((mindex.nclusters, mindex.nclusters), 0, - dtype=object) + + cluster_mat: np.ndarray = np.empty(mindex.nclusters, dtype=object) for i in range(mindex.nclusters): shape = mindex.cluster_shape(i, i) - cluster_mat[i, i] = mindex.flat_cluster_take(mat, i).reshape(*shape) + cluster_mat[i] = mindex.flat_cluster_take(mat, i).reshape(*shape) return cluster_mat @@ -337,8 +354,8 @@ def interp_decomp( :return: a tuple ``(k, idx, interp)`` containing the numerical rank *k*, the column indices *idx* and the resulting interpolation matrix *interp*. """ - if rank is not None and eps is not None: - raise ValueError("providing both 'rank' and 'eps' is not supported") + if (rank is not None and eps is not None) or (rank is None and eps is None): + raise ValueError("either 'rank' or 'eps' must be provided (not both)") from scipy import __version__ @@ -366,7 +383,7 @@ def interp_decomp( # {{{ cluster matrix errors def cluster_skeletonization_error( - mat: np.ndarray, skeleton: "SkeletonizationResult", *, + mat: np.ndarray, skeleton: SkeletonizationResult, *, ord: float | None = None, relative: bool = False) -> tuple[np.ndarray, np.ndarray]: r"""Evaluate the cluster-wise skeletonization errors for the given *skeleton*. @@ -402,7 +419,7 @@ def cluster_skeletonization_error( tgt_src_index = skeleton.tgt_src_index nclusters = skeleton.nclusters - def mnorm(x: np.ndarray, y: np.ndarray) -> "np.floating[Any]": + def mnorm(x: np.ndarray, y: np.ndarray) -> np.floating[Any]: result = la.norm(x - y, ord=ord) if relative: result = result / la.norm(x, ord=ord) @@ -440,9 +457,9 @@ def mnorm(x: np.ndarray, y: np.ndarray) -> "np.floating[Any]": def skeletonization_error( - mat: np.ndarray, skeleton: "SkeletonizationResult", *, + mat: np.ndarray, skeleton: SkeletonizationResult, *, ord: float | None = None, - relative: bool = False) -> "np.floating[Any]": + relative: bool = False) -> np.floating[Any]: r"""Computes the skeletonization error for the entire matrix *mat*. The error computed here is given by @@ -499,3 +516,5 @@ def skeletonization_error( return result # }}} + +# vim: foldmethod=marker diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py index 99ca6a174..6ec13c496 100644 --- a/pytential/symbolic/matrix.py +++ b/pytential/symbolic/matrix.py @@ -284,7 +284,7 @@ def map_call(self, expr): class ClusterMatrixBuilderBase(MatrixBuilderBase): """Evaluate individual clusters of a matrix operator, as defined by a - :class:`~pytential.linalg.TargetAndSourceClusterList`. + :class:`~pytential.linalg.utils.TargetAndSourceClusterList`. Unlike, e.g. :class:`MatrixBuilder`, matrix cluster builders are significantly reduced in scope. They are basically just meant @@ -304,7 +304,8 @@ def __init__(self, tgt_src_index: TargetAndSourceClusterList, context: dict[str, Any]) -> None: """ - :arg tgt_src_index: a :class:`~pytential.linalg.TargetAndSourceClusterList` + :arg tgt_src_index: a + :class:`~pytential.linalg.utils.TargetAndSourceClusterList` class describing which clusters are going to be evaluated. """ diff --git a/test/extra_matrix_data.py b/test/extra_matrix_data.py index 7c2b8e5a6..d4ab820a6 100644 --- a/test/extra_matrix_data.py +++ b/test/extra_matrix_data.py @@ -53,8 +53,8 @@ def get_cluster_index(self, actx, places, dofdesc=None): if max_particles_in_box is None: max_particles_in_box = discr.ndofs // self.approx_cluster_count - from pytential.linalg import partition_by_nodes - cindex = partition_by_nodes(actx, places, + from pytential.linalg.cluster import partition_by_nodes + cindex, ctree = partition_by_nodes(actx, places, dofdesc=dofdesc, tree_kind=self.tree_kind, max_particles_in_box=max_particles_in_box) @@ -76,12 +76,12 @@ def get_cluster_index(self, actx, places, dofdesc=None): from pytential.linalg import make_index_list cindex = make_index_list(subset) - return cindex + return cindex, ctree def get_tgt_src_cluster_index(self, actx, places, dofdesc=None): from pytential.linalg import TargetAndSourceClusterList - cindex = self.get_cluster_index(actx, places, dofdesc=dofdesc) - return TargetAndSourceClusterList(cindex, cindex) + cindex, ctree = self.get_cluster_index(actx, places, dofdesc=dofdesc) + return TargetAndSourceClusterList(cindex, cindex), ctree def get_operator(self, ambient_dim, qbx_forced_limit=_NoArgSentinel): knl = self.knl_class(ambient_dim) diff --git a/test/test_linalg_cluster.py b/test/test_linalg_cluster.py new file mode 100644 index 000000000..ed8b2045e --- /dev/null +++ b/test/test_linalg_cluster.py @@ -0,0 +1,129 @@ +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +import pytest + +import numpy as np + +from pytential import GeometryCollection +from meshmode.mesh.generation import NArmedStarfish + +from meshmode import _acf # noqa: F401 +from arraycontext import pytest_generate_tests_for_array_contexts +from meshmode.array_context import PytestPyOpenCLArrayContextFactory + +import extra_matrix_data as extra +import logging +logger = logging.getLogger(__name__) + +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory, + ]) + +CLUSTER_TEST_CASES = [ + extra.CurveTestCase( + name="starfish", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[64]), + extra.TorusTestCase( + target_order=4, + resolutions=[1]) + ] + + +# {{{ test_cluster_tree + +@pytest.mark.parametrize(("case", "tree_kind"), [ + (CLUSTER_TEST_CASES[0], None), + (CLUSTER_TEST_CASES[0], "adaptive"), + (CLUSTER_TEST_CASES[0], "adaptive-level-restricted"), + (CLUSTER_TEST_CASES[1], "adaptive"), + ]) +def test_cluster_tree(actx_factory, case, tree_kind, visualize=False): + if visualize: + logging.basicConfig(level=logging.INFO) + + from dataclasses import replace + actx = actx_factory() + case = replace(case, tree_kind=tree_kind) + logger.info("\n%s", case) + + discr = case.get_discretization(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(discr, auto_where=case.name) + + srcindex, ctree = case.get_cluster_index(actx, places) + assert srcindex.nclusters == ctree.nclusters + + from pytential.linalg.cluster import split_array + rng = np.random.default_rng(42) + x = split_array(rng.random(srcindex.indices.shape), srcindex) + + logger.info("nclusters %4d nlevels %4d", srcindex.nclusters, ctree.nlevels) + + if visualize and ctree._tree is not None: + import matplotlib.pyplot as plt + fig = plt.figure(figsize=(10, 10), dpi=300) + + from boxtree.visualization import TreePlotter + plotter = TreePlotter(ctree._tree) + plotter.draw_tree(fill=False, edgecolor="black", zorder=10) + plotter.draw_box_numbers() + plotter.set_bounding_box() + + fig.savefig("test_cluster_tree") + + from pytential.linalg.cluster import cluster, uncluster + for clevel in ctree.levels: + logger.info("======== Level %d", clevel.level) + logger.info("box_ids %s", clevel.box_ids) + logger.info("sizes %s", np.diff(srcindex.starts)) + logger.info("parent_map %s", clevel.parent_map) + + assert srcindex.nclusters == clevel.nclusters + + next_srcindex = cluster(srcindex, clevel) + for i, ppm in enumerate(clevel.parent_map): + partition = np.concatenate([srcindex.cluster_indices(j) for j in ppm]) + + assert partition.size == next_srcindex.cluster_size(i) + assert np.allclose(partition, next_srcindex.cluster_indices(i)) + + y = cluster(x, clevel) + z = uncluster(y, srcindex, clevel) + assert all(np.allclose(xi, zi) for xi, zi in zip(x, z, strict=True)) + + srcindex = next_srcindex + x = y + +# }}} + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker diff --git a/test/test_linalg_proxy.py b/test/test_linalg_proxy.py index 99029b63d..cb1184896 100644 --- a/test/test_linalg_proxy.py +++ b/test/test_linalg_proxy.py @@ -29,7 +29,7 @@ from arraycontext import flatten, unflatten from pytential import bind, sym from pytential import GeometryCollection -from pytential.linalg import ProxyGenerator, QBXProxyGenerator +from pytential.linalg.proxy import ProxyGenerator, QBXProxyGenerator from meshmode.mesh.generation import ellipse, NArmedStarfish from meshmode import _acf # noqa: F401 @@ -214,7 +214,7 @@ def test_partition_points(actx_factory, tree_kind, case, visualize=False): places = GeometryCollection(qbx, auto_where=case.name) density_discr = places.get_discretization(case.name) - mindex = case.get_cluster_index(actx, places) + mindex, _ = case.get_cluster_index(actx, places) expected_indices = np.arange(0, density_discr.ndofs) assert mindex.starts[-1] == density_discr.ndofs @@ -261,7 +261,7 @@ def test_proxy_generator(actx_factory, case, places = GeometryCollection(qbx, auto_where=case.name) density_discr = places.get_discretization(case.name) - cindex = case.get_cluster_index(actx, places) + cindex, _ = case.get_cluster_index(actx, places) generator = proxy_generator_cls(places, approx_nproxy=case.proxy_approx_count, @@ -307,7 +307,7 @@ def test_proxy_generator(actx_factory, case, ProxyGenerator, QBXProxyGenerator, ]) @pytest.mark.parametrize("index_sparsity_factor", [1.0, 0.6]) -@pytest.mark.parametrize("proxy_radius_factor", [1, 1.1]) +@pytest.mark.parametrize("proxy_radius_factor", [1.0, 1.1]) def test_neighbor_points(actx_factory, case, proxy_generator_cls, index_sparsity_factor, proxy_radius_factor, visualize=False): @@ -330,7 +330,7 @@ def test_neighbor_points(actx_factory, case, dofdesc = places.auto_source density_discr = places.get_discretization(dofdesc.geometry) - srcindex = case.get_cluster_index(actx, places) + srcindex, _ = case.get_cluster_index(actx, places) # generate proxy points generator = proxy_generator_cls(places, @@ -339,8 +339,8 @@ def test_neighbor_points(actx_factory, case, pxy = generator(actx, dofdesc, srcindex) # get neighboring points - from pytential.linalg import gather_cluster_neighbor_points - nbrindex = gather_cluster_neighbor_points(actx, pxy) + from pytential.linalg.proxy import gather_cluster_neighbor_points + nbrindex = gather_cluster_neighbor_points(actx, pxy, srcindex) pxy = pxy.to_numpy(actx) nodes = actx.to_numpy( diff --git a/test/test_linalg_skeletonization.py b/test/test_linalg_skeletonization.py index bc61e3986..89e0eb091 100644 --- a/test/test_linalg_skeletonization.py +++ b/test/test_linalg_skeletonization.py @@ -107,6 +107,7 @@ def test_skeletonize_symbolic(actx_factory, case, visualize=False): """ actx = actx_factory() + rng = np.random.default_rng(42) if visualize: logging.basicConfig(level=logging.INFO) @@ -119,40 +120,21 @@ def test_skeletonize_symbolic(actx_factory, case, visualize=False): places = GeometryCollection(qbx, auto_where=dd) density_discr = places.get_discretization(dd.geometry, dd.discr_stage) - tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd) + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) logger.info("nclusters %3d ndofs %7d", tgt_src_index.nclusters, density_discr.ndofs) # }}} - # {{{ wranglers - - from pytential.linalg import QBXProxyGenerator - from pytential.linalg.skeletonization import make_skeletonization_wrangler - proxy_generator = QBXProxyGenerator(places, - radius_factor=case.proxy_radius_factor, - approx_nproxy=case.proxy_approx_count) - + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy sym_u, sym_op = case.get_operator(places.ambient_dim) - wrangler = make_skeletonization_wrangler(places, sym_op, sym_u, - domains=None, - context=case.knl_concrete_kwargs, - _weighted_proxy=case.weighted_proxy, - _proxy_source_cluster_builder=case.proxy_source_cluster_builder, - _proxy_target_cluster_builder=case.proxy_target_cluster_builder, - _neighbor_cluster_builder=case.neighbor_cluster_builder) - - # }}} - - from pytential.linalg.skeletonization import ( - _skeletonize_block_by_proxy_with_mats) - - rng = np.random.default_rng(42) - _skeletonize_block_by_proxy_with_mats( - actx, 0, 0, places, proxy_generator, wrangler, tgt_src_index, + rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, sym_op, sym_u, + context=case.knl_concrete_kwargs, + auto_where=dd, id_eps=1.0e-8, - rng=rng, + rng=rng ) # }}} @@ -165,6 +147,7 @@ def run_skeletonize_by_proxy(actx, case, resolution, places=None, mat=None, ctol=None, rtol=None, rng=None, + tgt_src_index=None, suffix="", visualize=False): from pytools import ProcessTimer @@ -174,9 +157,12 @@ def run_skeletonize_by_proxy(actx, case, resolution, if places is None: qbx = case.get_layer_potential(actx, resolution, case.target_order) places = GeometryCollection(qbx, auto_where=dd) + else: + qbx = places.get_geometry(dd.geometry) density_discr = places.get_discretization(dd.geometry, dd.discr_stage) - tgt_src_index = case.get_tgt_src_cluster_index(actx, places, dd) + if tgt_src_index is None: + tgt_src_index, _ = case.get_tgt_src_cluster_index(actx, places, dd) logger.info("nclusters %3d ndofs %7d", tgt_src_index.nclusters, density_discr.ndofs) @@ -193,7 +179,7 @@ def run_skeletonize_by_proxy(actx, case, resolution, logger.info("proxy factor %.2f count %7d", case.proxy_radius_factor, proxy_approx_count) - from pytential.linalg import QBXProxyGenerator + from pytential.linalg.proxy import QBXProxyGenerator from pytential.linalg.skeletonization import make_skeletonization_wrangler proxy_generator = QBXProxyGenerator(places, radius_factor=case.proxy_radius_factor, @@ -201,8 +187,8 @@ def run_skeletonize_by_proxy(actx, case, resolution, sym_u, sym_op = case.get_operator(places.ambient_dim) wrangler = make_skeletonization_wrangler(places, sym_op, sym_u, - domains=None, context=case.knl_concrete_kwargs, + auto_where=dd, _weighted_proxy=case.weighted_proxy, _proxy_source_cluster_builder=case.proxy_source_cluster_builder, _proxy_target_cluster_builder=case.proxy_target_cluster_builder, @@ -247,10 +233,13 @@ def run_skeletonize_by_proxy(actx, case, resolution, logger.info("[time] skeletonization by proxy: %s", p) + def intersect1d(x, y): + return np.where((x.reshape(1, -1) - y.reshape(-1, 1)) == 0)[1] + L, R = skeleton.L, skeleton.R for i in range(tgt_src_index.nclusters): # targets (rows) - bi = np.searchsorted( + bi = intersect1d( tgt_src_index.targets.cluster_indices(i), skeleton.skel_tgt_src_index.targets.cluster_indices(i), ) @@ -260,7 +249,7 @@ def run_skeletonize_by_proxy(actx, case, resolution, tgt_error = la.norm(A - L[i] @ S, ord=ord) / la.norm(A, ord=ord) # sources (columns) - bj = np.searchsorted( + bj = intersect1d( tgt_src_index.sources.cluster_indices(i), skeleton.skel_tgt_src_index.sources.cluster_indices(i), ) @@ -274,8 +263,8 @@ def run_skeletonize_by_proxy(actx, case, resolution, src_error, tgt_error, R[i].shape[0], R[i].shape[1]) if ctol is not None: - assert src_error < ctol * case.id_eps - assert tgt_error < ctol * case.id_eps + assert src_error < ctol + assert tgt_error < ctol # }}} @@ -306,9 +295,9 @@ def run_skeletonize_by_proxy(actx, case, resolution, rtol if rtol is not None else 0.0) if rtol: - assert err_l < rtol * case.id_eps - assert err_r < rtol * case.id_eps - assert err_f < rtol * case.id_eps + assert err_l < rtol + assert err_r < rtol + assert err_f < rtol # }}} @@ -348,19 +337,18 @@ def run_skeletonize_by_proxy(actx, case, resolution, # }}} - return err_f, (places, mat) + return err_f, (places, mat, skeleton) @pytest.mark.parametrize("case", [ - # NOTE: skip 2d tests, since they're better checked for convergence in - # `test_skeletonize_by_proxy_convergence` - # SKELETONIZE_TEST_CASES[0], SKELETONIZE_TEST_CASES[1], + SKELETONIZE_TEST_CASES[0], + SKELETONIZE_TEST_CASES[1], SKELETONIZE_TEST_CASES[2], ]) def test_skeletonize_by_proxy(actx_factory, case, visualize=False): - r"""Test single-level skeletonization accuracy. Checks that the error - satisfies :math:`e < c \epsilon_{id}` for a fixed ID tolerance and an - empirically determined (not too huge) :math:`c`. + r"""Test multilevel skeletonization accuracy. Checks that the error for + every level satisfies :math:`e < c \epsilon_{id}` for a fixed ID tolerance + and an empirically determined (not too huge) :math:`c`. """ import scipy.linalg.interpolative as sli @@ -376,13 +364,27 @@ def test_skeletonize_by_proxy(actx_factory, case, visualize=False): case = replace(case, approx_cluster_count=6, id_eps=1.0e-8) logger.info("\n%s", case) - run_skeletonize_by_proxy( - actx, case, case.resolutions[0], - ctol=10, - # FIXME: why is the 3D error so large? - rtol=10**case.ambient_dim, - rng=rng, - visualize=visualize) + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[0], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) + mat = None + + from pytential.linalg.cluster import cluster + for clevel in ctree.levels[:-1]: + logger.info("[%2d/%2d] nclusters %3d", + clevel.level, ctree.nlevels, clevel.nclusters) + + _, (_, mat, skeleton) = run_skeletonize_by_proxy( + actx, case, case.resolutions[0], + ctol=10 * case.id_eps, + # FIXME: why is the 3D error so large? + rtol=10**case.ambient_dim * case.id_eps, + places=places, mat=mat, rng=rng, tgt_src_index=tgt_src_index, + visualize=visualize) + + tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) # }}} @@ -448,9 +450,11 @@ def test_skeletonize_by_proxy_convergence( places = mat = None for i in range(id_eps.size): case = replace(case, id_eps=id_eps[i], weighted_proxy=weighted) - rec_error[i], (places, mat) = run_skeletonize_by_proxy( - actx, case, r, places=places, mat=mat, - suffix=f"{suffix}_{i:04d}", rng=rng, visualize=False) + + if not was_zero: + rec_error[i], (places, mat, _) = run_skeletonize_by_proxy( + actx, case, r, places=places, mat=mat, rng=rng, + suffix=f"{suffix}_{i:04d}", visualize=False) was_zero = rec_error[i] == 0.0 eoc.add_data_point(id_eps[i], rec_error[i]) diff --git a/test/test_matrix.py b/test/test_matrix.py index 905ae70bf..4d9c416bb 100644 --- a/test/test_matrix.py +++ b/test/test_matrix.py @@ -61,7 +61,7 @@ def max_cluster_error(mat, clusters, mindex, p=None): error = max( error, - la.norm(mat_i - clusters[i, i], ord=p) / norm_mat_i + la.norm(mat_i - clusters[i], ord=p) / norm_mat_i ) return error @@ -346,7 +346,7 @@ def test_cluster_builder(actx_factory, ambient_dim, # {{{ matrix - mindex = case.get_tgt_src_cluster_index(actx, places) + mindex, _ = case.get_tgt_src_cluster_index(actx, places) kwargs = { "dep_expr": sym_u, "other_dep_exprs": [], @@ -468,8 +468,8 @@ def test_build_matrix_fixed_stage(actx_factory, logger.info("ndofs: %d", target_discr.ndofs) from pytential.linalg import TargetAndSourceClusterList - itargets = case.get_cluster_index(actx, places, target_dd) - jsources = case.get_cluster_index(actx, places, source_dd) + itargets, _ = case.get_cluster_index(actx, places, target_dd) + jsources, _ = case.get_cluster_index(actx, places, source_dd) mindex = TargetAndSourceClusterList(itargets, jsources) kwargs = { From 81ef190bbd60bc21cda7d210703b5dad717607af Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Fri, 17 Jun 2022 15:10:34 +0300 Subject: [PATCH 2/4] add recursive clustering and skeletonization --- pytential/linalg/skeletonization.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pytential/linalg/skeletonization.py b/pytential/linalg/skeletonization.py index b57db6975..583500c19 100644 --- a/pytential/linalg/skeletonization.py +++ b/pytential/linalg/skeletonization.py @@ -915,8 +915,8 @@ def rec_skeletonize_by_proxy( proxy_radius_factor: float | None = None, id_eps: float | None = None, - rng: np.random.Generator | None = None, max_particles_in_box: int | None = None, + rng: np.random.Generator | None = None, _wrangler: SkeletonizationWrangler | None = None, _proxy: ProxyGeneratorBase | None = None) -> np.ndarray: @@ -961,7 +961,7 @@ def rec_skeletonize_by_proxy( rng=rng, max_particles_in_box=max_particles_in_box) - skel_per_level[i] = skeleton + skel_per_level[i] = skeleton # type: ignore[call-overload] tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) assert tgt_src_index.nclusters == 1 @@ -969,7 +969,7 @@ def rec_skeletonize_by_proxy( # evaluate the full root cluster (no skeletonization or anything) skeleton = _evaluate_root(actx, 0, 0, places, wrangler, tgt_src_index) - skel_per_level[-1] = skeleton + skel_per_level[-1] = skeleton # type: ignore[call-overload] return skel_per_level From e40ef4c2048eb3992efbf426356d062ab8b53f9d Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Fri, 17 Jun 2022 15:10:34 +0300 Subject: [PATCH 3/4] add recursive clustering and skeletonization --- pytential/linalg/cluster.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pytential/linalg/cluster.py b/pytential/linalg/cluster.py index 7a90e2cac..fce5c2a26 100644 --- a/pytential/linalg/cluster.py +++ b/pytential/linalg/cluster.py @@ -296,7 +296,6 @@ def uncluster(ary: np.ndarray, index: IndexList, clevel: ClusterLevel) -> np.nda def _build_binary_ish_tree_from_starts(starts: np.ndarray) -> ClusterTree: partition_box_ids: np.ndarray[tuple[int, ...], Any] = np.arange(starts.size - 1) - box_ids = partition_box_ids box_parent_ids = [] From bf42a83f335715be76740b58e7e7d36eff2e3c53 Mon Sep 17 00:00:00 2001 From: Alexandru Fikl Date: Thu, 4 Aug 2022 16:21:49 +0300 Subject: [PATCH 4/4] implement hierachical matrix solver --- .gitignore | 2 + examples/scaling-study-hmatrix.py | 198 +++++++ pytential/linalg/direct_solver_symbolic.py | 8 +- pytential/linalg/hmatrix.py | 547 +++++++++++++++++++ pytential/linalg/skeletonization.py | 42 +- pytential/linalg/utils.py | 63 +++ pytential/symbolic/matrix.py | 18 +- test/extra_matrix_data.py | 21 +- test/test_linalg_hmatrix.py | 590 +++++++++++++++++++++ test/test_linalg_skeletonization.py | 101 ++++ 10 files changed, 1561 insertions(+), 29 deletions(-) create mode 100644 examples/scaling-study-hmatrix.py create mode 100644 pytential/linalg/hmatrix.py create mode 100644 test/test_linalg_hmatrix.py diff --git a/.gitignore b/.gitignore index daf719e6f..c89b827fb 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,8 @@ examples/*.pdf *.vtu *.vts +*.png +*.gif .cache diff --git a/examples/scaling-study-hmatrix.py b/examples/scaling-study-hmatrix.py new file mode 100644 index 000000000..d163e7383 --- /dev/null +++ b/examples/scaling-study-hmatrix.py @@ -0,0 +1,198 @@ +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from dataclasses import dataclass + +import numpy as np + +from meshmode.array_context import PyOpenCLArrayContext +from pytools.convergence import EOCRecorder + +from pytential import GeometryCollection, sym + +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +@dataclass(frozen=True) +class Timings: + build: float + matvec: float + + +def run_hmatrix_matvec( + actx: PyOpenCLArrayContext, + places: GeometryCollection, *, + dofdesc: sym.DOFDescriptor) -> None: + from sumpy.kernel import LaplaceKernel + kernel = LaplaceKernel(places.ambient_dim) + sym_u = sym.var("u") + sym_op = -0.5 * sym_u + sym.D(kernel, sym_u, qbx_forced_limit="avg") + + density_discr = places.get_discretization(dofdesc.geometry, dofdesc.discr_stage) + u = actx.thaw(density_discr.nodes()[0]) + + def build_hmat(): + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + return build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dofdesc], + context={}, + auto_where=dofdesc, + id_eps=1.0e-10, + _tree_kind="adaptive-level-restricted", + _approx_nproxy=64, + _proxy_radius_factor=1.15).get_forward() + + # warmup + from pytools import ProcessTimer + with ProcessTimer() as pt: + hmat = build_hmat() + actx.queue.finish() + + logger.info("build(warmup): %s", pt) + + # build + with ProcessTimer() as pt: + hmat = build_hmat() + actx.queue.finish() + + t_build = pt.wall_elapsed + logger.info("build: %s", pt) + + # matvec + with ProcessTimer() as pt: + du = hmat @ u + assert du is not None + actx.queue.finish() + + t_matvec = pt.wall_elapsed + logger.info("matvec: %s", pt) + + return Timings(t_build, t_matvec) + + +def run_scaling_study( + ambient_dim: int, *, + target_order: int = 4, + source_ovsmp: int = 4, + qbx_order: int = 4, + ) -> None: + dd = sym.DOFDescriptor(f"d{ambient_dim}", discr_stage=sym.QBX_SOURCE_STAGE2) + + import pyopencl as cl + ctx = cl.create_some_context() + queue = cl.CommandQueue(ctx) + actx = PyOpenCLArrayContext(queue, force_device_scalars=True) + + eoc_build = EOCRecorder() + eoc_matvec = EOCRecorder() + + import meshmode.mesh.generation as mgen + import meshmode.discretization.poly_element as mpoly + resolutions = [64, 128, 256, 512, 1024, 1536, 2048, 2560, 3072] + + for n in resolutions: + mesh = mgen.make_curve_mesh( + mgen.NArmedStarfish(5, 0.25), + np.linspace(0, 1, n), + order=target_order) + + from meshmode.discretization import Discretization + pre_density_discr = Discretization(actx, mesh, + mpoly.InterpolatoryQuadratureGroupFactory(target_order)) + + from pytential.qbx import QBXLayerPotentialSource + qbx = QBXLayerPotentialSource( + pre_density_discr, + fine_order=source_ovsmp * target_order, + qbx_order=qbx_order, + fmm_order=False, fmm_backend=None, + ) + places = GeometryCollection(qbx, auto_where=dd.geometry) + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + + logger.info("ndofs: %d", density_discr.ndofs) + logger.info("nelements: %d", density_discr.mesh.nelements) + + timings = run_hmatrix_matvec(actx, places, dofdesc=dd) + eoc_build.add_data_point(density_discr.ndofs, timings.build) + eoc_matvec.add_data_point(density_discr.ndofs, timings.matvec) + + for name, eoc in [("build", eoc_build), ("matvec", eoc_matvec)]: + logger.info("%s\n%s", + name, eoc.pretty_print( + abscissa_label="dofs", + error_label=f"{name} (s)", + abscissa_format="%d", + error_format="%.3fs", + eoc_format="%.2f", + ) + ) + visualize_eoc(f"scaling-study-hmatrix-{name}", eoc, 1) + + +def visualize_eoc( + filename: str, eoc: EOCRecorder, order: int, + overwrite: bool = False) -> None: + try: + import matplotlib.pyplot as plt + except ImportError: + logger.info("matplotlib not available for plotting") + return + + fig = plt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + h, error = np.array(eoc.history).T # type: ignore[no-untyped-call] + ax.loglog(h, error, "o-") + + max_h = np.max(h) + min_e = np.min(error) + max_e = np.max(error) + min_h = np.exp(np.log(max_h) + np.log(min_e / max_e) / order) + + ax.loglog( + [max_h, min_h], [max_e, min_e], "k-", label=rf"$\mathcal{{O}}(h^{order})$" + ) + + # }}} + + ax.grid(True, which="major", linestyle="-", alpha=0.75) + ax.grid(True, which="minor", linestyle="--", alpha=0.5) + + ax.set_xlabel("$N$") + ax.set_ylabel("$T~(s)$") + + import pathlib + filename = pathlib.Path(filename) + if not overwrite and filename.exists(): + raise FileExistsError(f"output file '{filename}' already exists") + + fig.savefig(filename) + plt.close(fig) + + +if __name__ == "__main__": + run_scaling_study(ambient_dim=2) diff --git a/pytential/linalg/direct_solver_symbolic.py b/pytential/linalg/direct_solver_symbolic.py index 118b94597..2902ee2c2 100644 --- a/pytential/linalg/direct_solver_symbolic.py +++ b/pytential/linalg/direct_solver_symbolic.py @@ -54,12 +54,16 @@ def prepare_expr(places, exprs, auto_where=None): return _prepare_expr(places, exprs, auto_where=auto_where) -def prepare_proxy_expr(places, exprs, auto_where=None): +def prepare_proxy_expr( + places, exprs, + auto_where=None, + remove_transforms: bool = True): def _prepare_expr(expr): # remove all diagonal / non-operator terms in the expression expr = IntGTermCollector()(expr) # ensure all IntGs remove all the kernel derivatives - expr = KernelTransformationRemover()(expr) + if remove_transforms: + expr = KernelTransformationRemover()(expr) # ensure all IntGs have their source and targets set expr = DOFDescriptorReplacer(auto_where[0], auto_where[1])(expr) diff --git a/pytential/linalg/hmatrix.py b/pytential/linalg/hmatrix.py new file mode 100644 index 000000000..6dce97c47 --- /dev/null +++ b/pytential/linalg/hmatrix.py @@ -0,0 +1,547 @@ +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from collections.abc import Callable, Sequence +from dataclasses import dataclass +from typing import Any + +import numpy as np +import numpy.linalg as la +from scipy.sparse.linalg import LinearOperator + +from arraycontext import PyOpenCLArrayContext, ArrayOrContainerT, flatten, unflatten +from meshmode.dof_array import DOFArray + +from pytential import GeometryCollection, sym +from pytential.linalg.proxy import ProxyGeneratorBase +from pytential.linalg.cluster import ClusterTree, ClusterLevel +from pytential.linalg.skeletonization import ( + SkeletonizationWrangler, SkeletonizationResult) + +import logging +logger = logging.getLogger(__name__) + + +__doc__ = """ +Hierarical Matrix Construction +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +.. autoclass:: ProxyHierarchicalMatrixWrangler +.. autoclass:: ProxyHierarchicalMatrix +.. autoclass:: ProxyHierarchicalForwardMatrix +.. autoclass:: ProxyHierarchicalBackwardMatrix + +.. autofunction:: build_hmatrix_by_proxy +""" + + +# {{{ error model + +def hmatrix_error_from_param( + ambient_dim: int, + *, + id_eps: float, + id_rank: int, + min_proxy_radius: float, + max_cluster_radius: float, + nproxies: int, + nsources: int, + ntargets: int, c: float = 1.0e-3) -> float: + if ambient_dim == 2: + p = int(0.5 * id_rank) + elif ambient_dim == 3: + p = int((np.sqrt(1 + 4 * id_rank) - 1) / 2) + else: + raise ValueError(f"unsupported ambient dimension: '{ambient_dim}'") + + rho = alpha = max_cluster_radius / min_proxy_radius + return ( + c * rho ** (p + 1) / (1 - rho) + + np.sqrt(nsources / nproxies) + * (1 - alpha ** (p + 1)) / (1 - alpha) * id_eps + ) + +# }}} + + +# {{{ update diagonals + +def _update_skeleton_diagonal( + skeleton: SkeletonizationResult, + parent: SkeletonizationResult | None, + clevel: ClusterLevel | None, + diagonal: np.ndarray | None = None) -> SkeletonizationResult: + """Due to the evaluation in :func:`_skeletonize_block_by_proxy_with_mats`, + the diagonal matrix in *skeleton* also contains the indices from its + parent. In particular, at a level :math:`l` we need the diagonal block:: + + 0 D_{i, j + 1} D_{i, j + 2} + D_{i + 1, j} 0 D_{i + 1, j + 2} + D_{i + 2, j} D_{i + 2, j + 1} 0 + + but the version in *skeleton* also fills in the 0 blocks in there. This + routine goes through them and zeros them out. + """ + + if clevel is None: + return skeleton + + assert parent is not None + assert skeleton.tgt_src_index.shape == parent.skel_tgt_src_index.shape + + if diagonal is None: + diagonal = np.zeros(parent.nclusters) + + from numbers import Number + if isinstance(diagonal, Number): + diagonal = np.full(parent.nclusters, diagonal) + + assert diagonal.size == parent.nclusters + targets, sources = parent.skel_tgt_src_index + + # FIXME: nicer way to do this? + mat: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + for k in range(skeleton.nclusters): + D = skeleton.D[k].copy() + + i = j = 0 + for icluster in clevel.parent_map[k]: + di = targets.cluster_size(icluster) + dj = sources.cluster_size(icluster) + D[np.s_[i:i + di], np.s_[j:j + dj]] = diagonal[icluster] + + i += di + j += dj + + assert D.shape == (i, j) + mat[k] = D + + from dataclasses import replace + return replace(skeleton, D=mat) + + +def _update_skeletons_diagonal( + wrangler: "ProxyHierarchicalMatrixWrangler", + func: Callable[[SkeletonizationResult], np.ndarray | None], + ) -> np.ndarray: + skeletons: np.ndarray = np.empty(wrangler.skeletons.shape, dtype=object) + skeletons[0] = wrangler.skeletons[0] + + for i in range(1, wrangler.ctree.nlevels): + skeletons[i] = _update_skeleton_diagonal( + wrangler.skeletons[i], + wrangler.skeletons[i - 1], + wrangler.ctree.levels[i - 1], + diagonal=func(skeletons[i - 1])) + + return skeletons + +# }}} + + +# {{{ ProxyHierarchicalMatrix + +@dataclass(frozen=True) +class ProxyHierarchicalMatrixWrangler: + """ + .. automethod:: get_forward + .. automethod:: get_backward + """ + + wrangler: SkeletonizationWrangler + proxy: ProxyGeneratorBase + ctree: ClusterTree + skeletons: np.ndarray + + def get_forward(self) -> "ProxyHierarchicalForwardMatrix": + return ProxyHierarchicalForwardMatrix( + ctree=self.ctree, + skeletons=_update_skeletons_diagonal(self, lambda s: None), + ) + + def get_backward(self) -> "ProxyHierarchicalBackwardMatrix": + return ProxyHierarchicalBackwardMatrix( + ctree=self.ctree, + skeletons=_update_skeletons_diagonal(self, lambda s: s.Dhat)) + + +@dataclass(frozen=True) +class ProxyHierarchicalMatrix(LinearOperator): + """ + .. attribute:: skeletons + + An :class:`~numpy.ndarray` containing skeletonization information + for each level of the hierarchy. For additional details, see + :class:`~pytential.linalg.skeletonization.SkeletonizationResult`. + + This class implements the :class:`scipy.sparse.linalg.LinearOperator` + interface. In particular, the following attributes and methods: + + .. attribute:: shape + + A :class:`tuple` that gives the matrix size ``(m, n)``. + + .. attribute:: dtype + + The data type of the matrix entries. + + .. automethod:: matvec + .. automethod:: __matmul__ + """ + + ctree: ClusterTree + skeletons: np.ndarray + + @property + def shape(self): + return self.skeletons[0].tgt_src_index.shape + + @property + def dtype(self): + # FIXME: assert that everyone has this dtype? + return self.skeletons[0].R[0].dtype + + @property + def nlevels(self): + return self.skeletons.size + + @property + def nclusters(self): + return self.skeletons[0].nclusters + + def __matmul__(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + """Same as :meth:`matvec`.""" + return self._matvec(x) + + def _matmat(self, mat): + raise NotImplementedError + + def _adjoint(self, x): + raise NotImplementedError + +# }}} + + +# {{{ forward + +@dataclass(frozen=True) +class ProxyHierarchicalForwardMatrix(ProxyHierarchicalMatrix): + def _matvec(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + if isinstance(x, DOFArray): + from arraycontext import get_container_context_recursively_opt + actx = get_container_context_recursively_opt(x) + if actx is None: + raise ValueError("input array is frozen") + + ary = actx.to_numpy(flatten(x, actx)) + elif isinstance(x, np.ndarray) and x.dtype.char != "O": + ary = x + else: + raise TypeError(f"unsupported input type: {type(x)}") + + assert actx is None or isinstance(actx, PyOpenCLArrayContext) + result = apply_skeleton_forward_matvec(self, ary) + + if isinstance(x, DOFArray): + assert actx is not None + result = unflatten(x, actx.from_numpy(result), actx) + + return result + + +def apply_skeleton_forward_matvec( + hmat: ProxyHierarchicalMatrix, + ary: ArrayOrContainerT, + ) -> ArrayOrContainerT: + from pytential.linalg.cluster import split_array + targets, sources = hmat.skeletons[0].tgt_src_index + x = split_array(ary, sources) # type: ignore[arg-type] + + # NOTE: this computes a telescoping product of the form + # + # A x_0 = (D0 + L0 (D1 + L1 (...) R1) R0) x_0 + # + # with arbitrary numbers of levels. When recursing down, we compute + # + # x_{k + 1} = R_k x_k + # z_{k + 1} = D_k x_k + # + # and, at the root level, we have + # + # x_{N + 1} = z_{N + 1} = D_N x_N. + # + # When recursing back up, we take `b_{N + 1} = x_{N + 1}` and + # + # b_{k - 1} = z_k + L_k b_k + # + # which gives back the desired product when we reach the leaf level again. + + d_dot_x: np.ndarray = np.empty(hmat.nlevels, dtype=object) + + # {{{ recurse down + + from pytential.linalg.cluster import cluster + for k, clevel in enumerate(hmat.ctree.levels): + skeleton = hmat.skeletons[k] + assert x.shape == (skeleton.nclusters,) + assert skeleton.tgt_src_index.shape[1] == sum(xi.size for xi in x) + + d_dot_x_k: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + r_dot_x_k: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + + for i in range(skeleton.nclusters): + r_dot_x_k[i] = skeleton.R[i] @ x[i] + d_dot_x_k[i] = skeleton.D[i] @ x[i] + + d_dot_x[k] = d_dot_x_k + x = cluster(r_dot_x_k, clevel) + + # }}} + + # {{{ root + + # NOTE: at root level, we just multiply with the full diagonal + b = d_dot_x[hmat.nlevels - 1] + assert b.shape == (1,) + + # }}} + + # {{{ recurse up + + from pytential.linalg.cluster import uncluster + + for k, clevel in reversed(list(enumerate(hmat.ctree.levels[:-1]))): + skeleton = hmat.skeletons[k] + d_dot_x_k = d_dot_x[k] + assert d_dot_x_k.shape == (skeleton.nclusters,) + + b = uncluster(b, skeleton.skel_tgt_src_index.targets, clevel) + for i in range(skeleton.nclusters): + b[i] = d_dot_x_k[i] + skeleton.L[i] @ b[i] + + assert b.shape == (hmat.nclusters,) + + # }}} + + return np.concatenate(b)[np.argsort(targets.indices)] + +# }}} + + +# {{{ backward + +@dataclass(frozen=True) +class ProxyHierarchicalBackwardMatrix(ProxyHierarchicalMatrix): + def _matvec(self, x: ArrayOrContainerT) -> ArrayOrContainerT: + if isinstance(x, DOFArray): + from arraycontext import get_container_context_recursively_opt + actx = get_container_context_recursively_opt(x) + if actx is None: + raise ValueError("input array is frozen") + + ary = actx.to_numpy(flatten(x, actx)) + elif isinstance(x, np.ndarray) and x.dtype.char != "O": + ary = x + else: + raise TypeError(f"unsupported input type: {type(x)}") + + assert actx is None or isinstance(actx, PyOpenCLArrayContext) + result = apply_skeleton_backward_matvec(actx, self, ary) + + if isinstance(x, DOFArray): + assert actx is not None + result = unflatten(x, actx.from_numpy(result), actx) + + return result + + +def apply_skeleton_backward_matvec( + actx: PyOpenCLArrayContext | None, + hmat: ProxyHierarchicalMatrix, + ary: ArrayOrContainerT, + ) -> ArrayOrContainerT: + from pytential.linalg.cluster import split_array + targets, sources = hmat.skeletons[0].tgt_src_index + + b = split_array(ary, targets) # type: ignore[arg-type] + r_dot_b: np.ndarray = np.empty(hmat.nlevels, dtype=object) + + # {{{ recurse down + + # NOTE: this solves a telescoping product of the form + # + # A x_0 = (D0 + L0 (D1 + L1 (...) R1) R0) x_0 = b_0 + # + # with arbitrary numbers of levels. When recursing down, we compute + # + # b_{k + 1} = \hat{D}_k R_k D_k^{-1} b_k + # \hat{D}_k = (R_k D_k^{-1} L_k)^{-1} + # + # and, at the root level, we solve + # + # D_N x_N = b_N. + # + # When recursing back up, we take `b_{N + 1} = x_{N + 1}` and + # + # x_{k} = D_k^{-1} (b_k - L_k b_{k + 1} + L_k \hat{D}_k x_{k + 1}) + # + # which gives back the desired product when we reach the leaf level again. + + from pytential.linalg.cluster import cluster + + for k, clevel in enumerate(hmat.ctree.levels): + skeleton = hmat.skeletons[k] + assert b.shape == (skeleton.nclusters,) + assert skeleton.tgt_src_index.shape[0] == sum(bi.size for bi in b) + + dhat_dot_b_k: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + for i in range(skeleton.nclusters): + dhat_dot_b_k[i] = ( + skeleton.Dhat[i] @ (skeleton.R[i] @ (skeleton.invD[i] @ b[i])) + ) + + r_dot_b[k] = b + b = cluster(dhat_dot_b_k, clevel) + + # }}} + + # {{{ root + + from pytools.obj_array import make_obj_array + assert b.shape == (1,) + x = make_obj_array([ + la.solve(D, bi) for D, bi in zip(hmat.skeletons[-1].D, b, strict=True) + ]) + + # }}} + + # {{{ recurse up + + from pytential.linalg.cluster import uncluster + + for k, clevel in reversed(list(enumerate(hmat.ctree.levels[:-1]))): + skeleton = hmat.skeletons[k] + b0 = r_dot_b[k] + b1 = r_dot_b[k + 1] + assert b0.shape == (skeleton.nclusters,) + + x = uncluster(x, skeleton.skel_tgt_src_index.sources, clevel) + b1 = uncluster(b1, skeleton.skel_tgt_src_index.targets, clevel) + + for i in range(skeleton.nclusters): + sx = b1[i] - skeleton.Dhat[i] @ x[i] + x[i] = skeleton.invD[i] @ (b0[i] - skeleton.L[i] @ sx) + + assert x.shape == (hmat.nclusters,) + + # }}} + + return np.concatenate(x)[np.argsort(sources.indices)] + +# }}} + + +# {{{ build_hmatrix_by_proxy + +def build_hmatrix_by_proxy( + actx: PyOpenCLArrayContext, + places: GeometryCollection, + exprs: sym.Expression | Sequence[sym.Expression], + input_exprs: sym.Variable | Sequence[sym.Variable], *, + auto_where: sym.DOFDescriptorLike | None = None, + domains: Sequence[sym.DOFDescriptorLike] | None = None, + context: dict[str, Any] | None = None, + id_eps: float = 1.0e-8, + rng: np.random.Generator | None = None, + + # NOTE: these are dev variables and can disappear at any time! + _tree_kind: str | None = "adaptive-level-restricted", + _weighted_proxy: bool | tuple[bool, bool] | None = None, + + # TODO: plugin in error model to get an estimate for: + # * how many points we want per cluster? + # * how many proxy points we want? + # * how far away should the proxy points be? + # based on id_eps. How many of these should be user tunable? + _max_particles_in_box: int | None = None, + _approx_nproxy: int | None = None, + _proxy_radius_factor: float | None = None, + ) -> ProxyHierarchicalMatrixWrangler: + from pytential.symbolic.matrix import P2PClusterMatrixBuilder + from pytential.linalg.skeletonization import make_skeletonization_wrangler + + def P2PClusterMatrixBuilderWithDiagonal(*args, **kwargs): + kwargs["exclude_self"] = True + return P2PClusterMatrixBuilder(*args, **kwargs) + + wrangler = make_skeletonization_wrangler( + places, exprs, input_exprs, + domains=domains, context=context, auto_where=auto_where, + _weighted_proxy=_weighted_proxy, + # _neighbor_cluster_builder=P2PClusterMatrixBuilderWithDiagonal, + # _proxy_source_cluster_builder=P2PClusterMatrixBuilder, + # _proxy_target_cluster_builder=P2PClusterMatrixBuilder, + ) + + if wrangler.nrows != 1 or wrangler.ncols != 1: + raise ValueError("multi-block operators are not supported") + + from pytential.linalg.proxy import QBXProxyGenerator + proxy = QBXProxyGenerator(places, + approx_nproxy=_approx_nproxy, + radius_factor=_proxy_radius_factor) + + from pytential.linalg.cluster import partition_by_nodes + cluster_index, ctree = partition_by_nodes( + actx, places, + dofdesc=wrangler.domains[0], + tree_kind=_tree_kind, + max_particles_in_box=_max_particles_in_box) + + logger.info("tree levels: %d", ctree.nlevels) + logger.info("cluster count: %d", cluster_index.nclusters) + logger.info("root cluster sizes: %s", [ + # NOTE: making into a list so that they all get printed + int(s) for s in np.diff(cluster_index.starts) + ]) + + from pytential.linalg.utils import TargetAndSourceClusterList + tgt_src_index = TargetAndSourceClusterList( + targets=cluster_index, sources=cluster_index) + + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy + skeletons = rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, exprs, input_exprs, + id_eps=id_eps, + rng=rng, + max_particles_in_box=_max_particles_in_box, + _proxy=proxy, + _wrangler=wrangler, + ) + + return ProxyHierarchicalMatrixWrangler( + wrangler=wrangler, proxy=proxy, ctree=ctree, skeletons=skeletons + ) + +# }}} + +# vim: foldmethod=marker diff --git a/pytential/linalg/skeletonization.py b/pytential/linalg/skeletonization.py index 583500c19..69dc10ab6 100644 --- a/pytential/linalg/skeletonization.py +++ b/pytential/linalg/skeletonization.py @@ -28,8 +28,10 @@ from typing import TYPE_CHECKING, Any import numpy as np +import numpy.linalg as la from arraycontext import PyOpenCLArrayContext, Array +from pytools import memoize_method from pytential import GeometryCollection, sym from pytential.symbolic.matrix import ClusterMatrixBuilderBase @@ -425,7 +427,7 @@ def evaluate_target_proxy_interaction( def make_skeletonization_wrangler( places: GeometryCollection, - exprs: sym.var | Sequence[sym.var], + exprs: sym.Expression | Sequence[sym.Expression], input_exprs: sym.var | Sequence[sym.var], *, domains: Sequence[Hashable] | None = None, context: dict[str, Any] | None = None, @@ -433,6 +435,7 @@ def make_skeletonization_wrangler( # internal _weighted_proxy: bool | tuple[bool, bool] | None = None, + _remove_source_transforms: bool = False, _proxy_source_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, _proxy_target_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, _neighbor_cluster_builder: type[ClusterMatrixBuilderBase] | None = None, @@ -460,9 +463,13 @@ def make_skeletonization_wrangler( prepared_lpot_exprs = prepare_expr(places, lpot_exprs, auto_where) source_proxy_exprs = prepare_proxy_expr( - places, prepared_lpot_exprs, (auto_where[0], PROXY_SKELETONIZATION_TARGET)) + places, prepared_lpot_exprs, (auto_where[0], PROXY_SKELETONIZATION_TARGET), + remove_transforms=_remove_source_transforms) target_proxy_exprs = prepare_proxy_expr( - places, prepared_lpot_exprs, (PROXY_SKELETONIZATION_SOURCE, auto_where[1])) + places, prepared_lpot_exprs, (PROXY_SKELETONIZATION_SOURCE, auto_where[1]), + # NOTE: transforms are unconditionally removed here because the + # source would be the proxies, where we do not have normals, etc. + remove_transforms=True) # }}} @@ -472,7 +479,7 @@ def make_skeletonization_wrangler( weighted_sources = weighted_targets = True elif isinstance(_weighted_proxy, bool): weighted_sources = weighted_targets = _weighted_proxy - elif isinstance(_weighted_proxy, tuple): + elif isinstance(_weighted_proxy, tuple) and len(_weighted_proxy) == 2: weighted_sources, weighted_targets = _weighted_proxy else: raise ValueError(f"unknown value for weighting: '{_weighted_proxy}'") @@ -678,9 +685,9 @@ def _skeletonize_block_by_proxy_with_mats( if __debug__: isfinite = np.isfinite(tgt_mat) - assert np.all(isfinite), np.where(isfinite) + assert np.all(isfinite), np.where(~isfinite) isfinite = np.isfinite(src_mat) - assert np.all(isfinite), np.where(isfinite) + assert np.all(isfinite), np.where(~isfinite) # skeletonize target points k, idx, interp = interp_decomp(tgt_mat.T, rank=k, eps=id_eps, rng=rng) @@ -830,13 +837,28 @@ def __post_init__(self): def nclusters(self): return self.tgt_src_index.nclusters + @property + @memoize_method + def invD(self) -> np.ndarray: + from pytools.obj_array import make_obj_array + return make_obj_array([la.inv(D) for D in self.D]) + + @property + @memoize_method + def Dhat(self) -> np.ndarray: + from pytools.obj_array import make_obj_array + return make_obj_array([ + la.inv(self.R[i] @ self.invD[i] @ self.L[i]) + for i in range(self.nclusters) + ]) + def skeletonize_by_proxy( actx: PyOpenCLArrayContext, places: GeometryCollection, tgt_src_index: TargetAndSourceClusterList, - exprs: sym.var | Sequence[sym.var], + exprs: sym.Expression | Sequence[sym.Expression], input_exprs: sym.var | Sequence[sym.var], *, domains: Sequence[Hashable] | None = None, context: dict[str, Any] | None = None, @@ -905,7 +927,7 @@ def rec_skeletonize_by_proxy( ctree: ClusterTree, tgt_src_index: TargetAndSourceClusterList, - exprs: sym.var | Sequence[sym.var], + exprs: sym.Expression | Sequence[sym.Expression], input_exprs: sym.var | Sequence[sym.var], *, domains: Sequence[Hashable] | None = None, context: dict[str, Any] | None = None, @@ -961,7 +983,7 @@ def rec_skeletonize_by_proxy( rng=rng, max_particles_in_box=max_particles_in_box) - skel_per_level[i] = skeleton # type: ignore[call-overload] + skel_per_level[i] = skeleton tgt_src_index = cluster(skeleton.skel_tgt_src_index, clevel) assert tgt_src_index.nclusters == 1 @@ -969,7 +991,7 @@ def rec_skeletonize_by_proxy( # evaluate the full root cluster (no skeletonization or anything) skeleton = _evaluate_root(actx, 0, 0, places, wrangler, tgt_src_index) - skel_per_level[-1] = skeleton # type: ignore[call-overload] + skel_per_level[-1] = skeleton return skel_per_level diff --git a/pytential/linalg/utils.py b/pytential/linalg/utils.py index ee64b1847..8f10ecd0a 100644 --- a/pytential/linalg/utils.py +++ b/pytential/linalg/utils.py @@ -456,6 +456,22 @@ def mnorm(x: np.ndarray, y: np.ndarray) -> np.floating[Any]: return tgt_error, src_error +def skeletonization_matrix( + mat: np.ndarray, skeleton: SkeletonizationResult, + ) -> tuple[np.ndarray, np.ndarray]: + D: np.ndarray = np.empty(skeleton.nclusters, dtype=object) + S: np.ndarray = np.empty((skeleton.nclusters, skeleton.nclusters), dtype=object) + + from itertools import product + for i, j in product(range(skeleton.nclusters), repeat=2): + if i == j: + D[i] = skeleton.tgt_src_index.cluster_take(mat, i, i) + else: + S[i, j] = skeleton.skel_tgt_src_index.cluster_take(mat, i, j) + + return D, S + + def skeletonization_error( mat: np.ndarray, skeleton: SkeletonizationResult, *, ord: float | None = None, @@ -517,4 +533,51 @@ def skeletonization_error( # }}} + +# {{{ eigenvalues + +def eigs( + mat, *, + k: int = 6, + which: str = "LM", + maxiter: int | None = None, + tol: float = 0.0) -> np.ndarray: + import scipy.sparse.linalg as ssla + + result = ssla.eigs(mat, + k=k, + which=which, + maxiter=maxiter, + tol=tol, + return_eigenvectors=False) + + imag_norm = np.linalg.norm(np.imag(result), ord=np.inf) + if imag_norm > 1.0e-14: + from warnings import warn + warn(f"eigenvalues are not real enough: norm(imag) = {imag_norm:.12e}", + stacklevel=2) + + return result + + +def cond(mat, *, + mat_inv=None, + p: float | None = None, + tol: float = 1.0e-6) -> float: + if p is None: + p = 2 + + if p != 2: + raise ValueError(f"unsupported norm order: '{p}'") + + lambda_max = eigs(mat, k=1, which="LM", tol=tol) + if mat_inv is None: + lambda_min = eigs(mat, k=1, which="SM", tol=tol) + else: + lambda_min = eigs(mat_inv, k=1, which="LM", tol=tol) + + return np.abs(lambda_max) / np.abs(lambda_min) + +# }}} + # vim: foldmethod=marker diff --git a/pytential/symbolic/matrix.py b/pytential/symbolic/matrix.py index 6ec13c496..1bab396e9 100644 --- a/pytential/symbolic/matrix.py +++ b/pytential/symbolic/matrix.py @@ -547,7 +547,6 @@ def map_int_g(self, expr): expr.target.geometry, expr.target.discr_stage) actx = self.array_context - target_base_kernel = expr.target_kernel.get_base_kernel() result = 0 for density, kernel in zip(expr.densities, expr.source_kernels, strict=True): @@ -561,12 +560,10 @@ def map_int_g(self, expr): # {{{ generator - base_kernel = kernel.get_base_kernel() - from sumpy.p2p import P2PMatrixGenerator mat_gen = P2PMatrixGenerator(actx.context, - source_kernels=(base_kernel,), - target_kernels=(target_base_kernel,), + source_kernels=(kernel,), + target_kernels=(expr.target_kernel,), exclude_self=self.exclude_self) # }}} @@ -576,7 +573,7 @@ def map_int_g(self, expr): # {{{ kernel args # NOTE: copied from pytential.symbolic.primitives.IntG - kernel_args = base_kernel.get_args() + base_kernel.get_source_args() + kernel_args = kernel.get_args() + kernel.get_source_args() kernel_args = {arg.loopy_arg.name for arg in kernel_args} kernel_args = _get_layer_potential_args( @@ -761,7 +758,6 @@ def map_int_g(self, expr): expr.target.geometry, expr.target.discr_stage) actx = self.array_context - target_base_kernel = expr.target_kernel.get_base_kernel() result = 0 for kernel, density in zip(expr.source_kernels, expr.densities, strict=True): @@ -782,12 +778,10 @@ def map_int_g(self, expr): # {{{ generator - base_kernel = kernel.get_base_kernel() - from sumpy.p2p import P2PMatrixSubsetGenerator mat_gen = P2PMatrixSubsetGenerator(actx.context, - source_kernels=(base_kernel,), - target_kernels=(target_base_kernel,), + source_kernels=(kernel,), + target_kernels=(expr.target_kernel,), exclude_self=self.exclude_self) # }}} @@ -797,7 +791,7 @@ def map_int_g(self, expr): # {{{ kernel args # NOTE: copied from pytential.symbolic.primitives.IntG - kernel_args = base_kernel.get_args() + base_kernel.get_source_args() + kernel_args = kernel.get_args() + kernel.get_source_args() kernel_args = {arg.loopy_arg.name for arg in kernel_args} kernel_args = _get_layer_potential_args( diff --git a/test/extra_matrix_data.py b/test/extra_matrix_data.py index d4ab820a6..339d40cd8 100644 --- a/test/extra_matrix_data.py +++ b/test/extra_matrix_data.py @@ -44,14 +44,23 @@ class MatrixTestCaseMixin: proxy_target_cluster_builder: Callable[..., Any] | None = None neighbor_cluster_builder: Callable[..., Any] | None = None - def get_cluster_index(self, actx, places, dofdesc=None): + def max_particles_in_box_for_discr(self, discr): + max_particles_in_box = self.max_particles_in_box + if max_particles_in_box is None: + max_particles_in_box = discr.ndofs // self.approx_cluster_count + + return max_particles_in_box + + def get_cluster_index( + self, actx, places, dofdesc=None, max_particles_in_box=None): if dofdesc is None: dofdesc = places.auto_source discr = places.get_discretization(dofdesc.geometry) - max_particles_in_box = self.max_particles_in_box if max_particles_in_box is None: - max_particles_in_box = discr.ndofs // self.approx_cluster_count + max_particles_in_box = self.max_particles_in_box + if max_particles_in_box is None: + max_particles_in_box = discr.ndofs // self.approx_cluster_count from pytential.linalg.cluster import partition_by_nodes cindex, ctree = partition_by_nodes(actx, places, @@ -78,9 +87,11 @@ def get_cluster_index(self, actx, places, dofdesc=None): return cindex, ctree - def get_tgt_src_cluster_index(self, actx, places, dofdesc=None): + def get_tgt_src_cluster_index( + self, actx, places, dofdesc=None, max_particles_in_box=None): from pytential.linalg import TargetAndSourceClusterList - cindex, ctree = self.get_cluster_index(actx, places, dofdesc=dofdesc) + cindex, ctree = self.get_cluster_index( + actx, places, dofdesc=dofdesc, max_particles_in_box=max_particles_in_box) return TargetAndSourceClusterList(cindex, cindex), ctree def get_operator(self, ambient_dim, qbx_forced_limit=_NoArgSentinel): diff --git a/test/test_linalg_hmatrix.py b/test/test_linalg_hmatrix.py new file mode 100644 index 000000000..649a3d6db --- /dev/null +++ b/test/test_linalg_hmatrix.py @@ -0,0 +1,590 @@ +__copyright__ = "Copyright (C) 2022 Alexandru Fikl" + +__license__ = """ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +""" + +from dataclasses import replace +import pytest + +import numpy as np + +from pytential import bind, sym +from pytential import GeometryCollection + +from meshmode.mesh.generation import NArmedStarfish + +from meshmode import _acf # noqa: F401 +from arraycontext import pytest_generate_tests_for_array_contexts +from meshmode.array_context import PytestPyOpenCLArrayContextFactory + +import extra_matrix_data as extra +import logging +logger = logging.getLogger(__name__) + +pytest_generate_tests = pytest_generate_tests_for_array_contexts([ + PytestPyOpenCLArrayContextFactory, + ]) + + +HMATRIX_TEST_CASES = [ + extra.CurveTestCase( + name="starfish", + op_type="scalar", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[512]), + extra.CurveTestCase( + name="starfish", + op_type="double", + target_order=4, + curve_fn=NArmedStarfish(5, 0.25), + resolutions=[512]), + extra.TorusTestCase( + target_order=4, + op_type="scalar", + resolutions=[0]) + ] + + +# {{{ test_hmatrix_forward_matvec_single_level + +def hmatrix_matvec_single_level(mat, x, skeleton): + from pytential.linalg.cluster import split_array + targets, sources = skeleton.tgt_src_index + y = split_array(x, sources) + + y_hat = np.empty(y.shape, dtype=object) + + for i in range(skeleton.nclusters): + y_hat[i] = skeleton.R[i] @ y[i] + + from pytential.linalg.utils import skeletonization_matrix + D, S = skeletonization_matrix(mat, skeleton) + syhat = np.zeros(y.shape, dtype=object) + + from itertools import product + for i, j in product(range(skeleton.nclusters), repeat=2): + if i == j: + continue + + syhat[i] = syhat[i] + S[i, j] @ y_hat[j] + + for i in range(skeleton.nclusters): + y[i] = D[i] @ y[i] + skeleton.L[i] @ syhat[i] + + return np.concatenate(y)[np.argsort(targets.indices)] + + +@pytest.mark.parametrize("case", HMATRIX_TEST_CASES) +@pytest.mark.parametrize("discr_stage", [sym.QBX_SOURCE_STAGE1]) +def test_hmatrix_forward_matvec_single_level( + actx_factory, case, discr_stage, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.15} + else: + kwargs = {"proxy_approx_count": 256, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + tgt_src_index, _ = case.get_tgt_src_cluster_index(actx, places, dd) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ construct reference + + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import MatrixBuilder + sym_u, sym_op = case.get_operator(places.ambient_dim) + mat = MatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(prepare_expr(places, sym_op, (dd, dd))) + + from arraycontext import flatten, unflatten + x = actx.thaw(density_discr.nodes()[0]) + y = actx.to_numpy(flatten(x, actx)) + r_lpot = unflatten(x, actx.from_numpy(mat @ y), actx) + + # }}} + + # {{{ check matvec + + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + from pytential.linalg.skeletonization import skeletonize_by_proxy + for i in range(id_eps.size): + skeleton = skeletonize_by_proxy( + actx, places, tgt_src_index, sym_op, sym_u, + domains=[dd], context={}, + approx_nproxy=case.proxy_approx_count, + proxy_radius_factor=case.proxy_radius_factor, + id_eps=id_eps[i], + rng=rng, + ) + r_hmat = hmatrix_matvec_single_level(mat, y, skeleton[0, 0]) + r_hmat = unflatten(x, actx.from_numpy(r_hmat), actx) + + from meshmode.dof_array import flat_norm + rec_error[i] = actx.to_numpy( + flat_norm(r_hmat - r_lpot) / flat_norm(r_lpot) + ) + logger.info("id_eps %.2e error: %.12e", id_eps[i], rec_error[i]) + # assert rec_error[i] < 0.1 + + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + basename = "linalg_hmatrix_single_matvec" + fig.savefig(f"{basename}_{case.name}_{case.op_type}_convergence") + + if case.ambient_dim == 2: + fig.clf() + ax = fig.gca() + + from arraycontext import flatten + r_hmap = actx.to_numpy(flatten(r_hmat, actx)) + r_lpot = actx.to_numpy(flatten(r_lpot, actx)) + + ax.semilogy(r_hmap - r_lpot) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"{basename}_{case.name}_{case.op_type}_error") + + pt.close(fig) + +# }}} + + +# {{{ test_hmatrix_forward_matvec + +@pytest.mark.parametrize("case", [ + HMATRIX_TEST_CASES[0], + HMATRIX_TEST_CASES[1], + pytest.param(HMATRIX_TEST_CASES[2], marks=pytest.mark.slowtest), + ]) +@pytest.mark.parametrize("discr_stage", [ + sym.QBX_SOURCE_STAGE1, + # sym.QBX_SOURCE_STAGE2 + ]) +def test_hmatrix_forward_matvec( + actx_factory, case, discr_stage, p2p=False, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + else: + kwargs = {"proxy_approx_count": 256, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + max_particles_in_box = case.max_particles_in_box_for_discr(density_discr) + + tgt_src_index, _ = case.get_tgt_src_cluster_index( + actx, places, dd, max_particles_in_box=max_particles_in_box) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ construct hmatrix + + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + sym_u, sym_op = case.get_operator(places.ambient_dim) + + x = actx.thaw(density_discr.nodes()[0]) + + if p2p: + # NOTE: this also needs changed in `build_hmatrix_by_proxy` + # to actually evaluate the p2p interactions instead of qbx + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import P2PMatrixBuilder + mat = P2PMatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(prepare_expr(places, sym_op, (dd, dd))) + + from arraycontext import flatten, unflatten + y = actx.to_numpy(flatten(x, actx)) + r_lpot = unflatten(x, actx.from_numpy(mat @ y), actx) + else: + r_lpot = bind(places, sym_op, auto_where=dd)(actx, u=x) + + from pytential.linalg.hmatrix import hmatrix_error_from_param + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + model_error = np.zeros_like(id_eps) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + for i in range(id_eps.size): + wrangler = build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dd], + context=case.knl_concrete_kwargs, + id_eps=id_eps[i], + rng=rng, + _tree_kind=case.tree_kind, + _max_particles_in_box=max_particles_in_box, + _approx_nproxy=case.proxy_approx_count, + _proxy_radius_factor=case.proxy_radius_factor, + ) + hmat = wrangler.get_forward() + + # {{{ skeletonization error + + from meshmode.dof_array import flat_norm + r_hmap = hmat @ x + rec_error[i] = actx.to_numpy( + flat_norm(r_hmap - r_lpot) / flat_norm(r_lpot) + ) + + # }}} + + # {{{ model error + + skeleton = hmat.skeletons[0] + icluster = np.argmax(np.diff(skeleton.skel_tgt_src_index.targets.starts)) + + proxy_radius = actx.to_numpy( + skeleton._src_eval_result.pxy.radii[icluster] + ) + cluster_radius = actx.to_numpy( + skeleton._src_eval_result.pxy.cluster_radii[icluster] + ) + + model_error[i] = hmatrix_error_from_param( + places.ambient_dim, + id_eps=id_eps[i], + min_proxy_radius=proxy_radius, + max_cluster_radius=cluster_radius, + id_rank=skeleton.skel_tgt_src_index.targets.cluster_size(icluster), + nproxies=skeleton._src_eval_result.pxy.pxyindex.cluster_size(icluster), + ntargets=skeleton.tgt_src_index.targets.cluster_size(icluster), + nsources=skeleton.tgt_src_index.targets.cluster_size(icluster), + c=1.0e-8 + ) + + # }}} + + logger.info("id_eps %.2e error: %.12e (%.12e)", + id_eps[i], rec_error[i], model_error[i]) + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + if not visualize: + assert eoc.order_estimate() > 0.6 + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + ax = fig.gca() + + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + ax.loglog(id_eps, model_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + lpot_name = "p2p" if p2p else "qbx" + basename = f"linalg_hmatrix_{lpot_name}_matvec" + fig.savefig(f"{basename}_{case.name}_{case.op_type}_convergence") + + if case.ambient_dim == 2: + fig.clf() + ax = fig.gca() + + from arraycontext import flatten + r_hmap = actx.to_numpy(flatten(r_hmap, actx)) + r_lpot = actx.to_numpy(flatten(r_lpot, actx)) + + ax.semilogy(r_hmap - r_lpot) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"{basename}_{case.name}_{case.op_type}_error") + + pt.close(fig) + +# }}} + + +# {{{ test_hmatrix_backward_matvec + +@pytest.mark.parametrize("case", [ + HMATRIX_TEST_CASES[0], + HMATRIX_TEST_CASES[1], + pytest.param(HMATRIX_TEST_CASES[2], marks=pytest.mark.slowtest), + ]) +@pytest.mark.parametrize("discr_stage", [ + sym.QBX_SOURCE_STAGE1, + # sym.QBX_SOURCE_STAGE2 + ]) +def test_hmatrix_backward_matvec(actx_factory, case, discr_stage, visualize=False): + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + if case.ambient_dim == 2: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + else: + kwargs = {"proxy_approx_count": 64, "proxy_radius_factor": 1.25} + + case = replace(case, skel_discr_stage=discr_stage, **kwargs) + logger.info("\n%s", case) + + # {{{ geometry + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + qbx = case.get_layer_potential(actx, case.resolutions[-1], case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + density_discr = places.get_discretization(dd.geometry, dd.discr_stage) + max_particles_in_box = case.max_particles_in_box_for_discr(density_discr) + + tgt_src_index, _ = case.get_tgt_src_cluster_index( + actx, places, dd, max_particles_in_box=max_particles_in_box) + + logger.info("dd %s", dd) + logger.info("nclusters %3d ndofs %7d", + tgt_src_index.nclusters, density_discr.ndofs) + + # }}} + + # {{{ + + sym_u, sym_op = case.get_operator(places.ambient_dim) + + if visualize: + from pytential.linalg.direct_solver_symbolic import prepare_expr + from pytential.symbolic.matrix import MatrixBuilder + mat = MatrixBuilder( + actx, + dep_expr=sym_u, + other_dep_exprs=[], + dep_discr=density_discr, + places=places, + context={}, + )(prepare_expr(places, sym_op, (dd, dd))) + + import pytential.linalg.utils as hla + eigs_ref = hla.eigs(mat, k=5) + kappa_ref = np.linalg.cond(mat, p=2) + + # }}} + + # {{{ construct hmatrix + + from pytential.linalg.hmatrix import build_hmatrix_by_proxy + sym_u, sym_op = case.get_operator(places.ambient_dim) + + x_ref = actx.thaw(density_discr.nodes()[0]) + b_ref = bind(places, sym_op, auto_where=dd)(actx, u=x_ref) + + id_eps = 10.0 ** (-np.arange(2, 16)) + rec_error = np.zeros_like(id_eps) + + if visualize: + rec_eigs = np.zeros((id_eps.size, eigs_ref.size), dtype=np.complex128) + rec_kappa = np.zeros(id_eps.size) + + from pytools.convergence import EOCRecorder + eoc = EOCRecorder() + + for i in range(id_eps.size): + wrangler = build_hmatrix_by_proxy( + actx, places, sym_op, sym_u, + domains=[dd], + context=case.knl_concrete_kwargs, + id_eps=id_eps[i], + rng=rng, + _tree_kind=case.tree_kind, + _max_particles_in_box=max_particles_in_box, + _approx_nproxy=case.proxy_approx_count, + _proxy_radius_factor=case.proxy_radius_factor, + ) + + hmat_inv = wrangler.get_backward() + x_hmat = hmat_inv @ b_ref + + if visualize: + hmat = wrangler.get_forward() + rec_eigs[i, :] = hla.eigs(hmat, k=5, tol=1.0e-6) + rec_kappa[i] = hla.cond(hmat, p=2, tol=1.0e-6) + + logger.info("eigs: %s %s", eigs_ref, rec_eigs[i]) + logger.info("kappa %.12e %.12e", kappa_ref, rec_kappa[i]) + + from meshmode.dof_array import flat_norm + rec_error[i] = actx.to_numpy( + flat_norm(x_hmat - x_ref) / flat_norm(x_ref) + ) + logger.info("id_eps %.2e error: %.12e", id_eps[i], rec_error[i]) + eoc.add_data_point(id_eps[i], rec_error[i]) + + logger.info("\n%s", eoc.pretty_print( + abscissa_format="%.8e", + error_format="%.8e", + eoc_format="%.2f")) + + if not visualize: + assert eoc.order_estimate() > 0.6 + + # }}} + + if not visualize: + return + + import matplotlib.pyplot as pt + fig = pt.figure(figsize=(10, 10), dpi=300) + + # {{{ convergence + + ax = fig.gca() + ax.loglog(id_eps, id_eps, "k--") + ax.loglog(id_eps, rec_error) + + ax.grid(True) + ax.set_xlabel(r"$\epsilon_{id}$") + ax.set_ylabel("$Error$") + ax.set_title(case.name) + + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_convergence") + fig.clf() + + # }}} + + # {{{ eigs + + ax = fig.gca() + ax.plot(np.real(eigs_ref), np.imag(eigs_ref), "ko") + for i in range(id_eps.size): + ax.plot(np.real(rec_eigs[i]), np.imag(rec_eigs[i]), "v") + + ax.grid(True) + ax.set_xlabel(r"$\Re \lambda$") + ax.set_ylabel(r"$\Im \lambda$") + + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_eigs") + fig.clf() + + # }}} + + if case.ambient_dim == 2: + ax = fig.gca() + + from arraycontext import flatten + x_hmat = actx.to_numpy(flatten(x_hmat, actx)) + x_ref = actx.to_numpy(flatten(x_ref, actx)) + + ax.semilogy(x_hmat - x_ref) + ax.set_ylim([1.0e-16, 1.0]) + fig.savefig(f"linalg_hmatrix_inverse_{case.name}_{case.op_type}_error") + fig.clf() + + pt.close(fig) + +# }}} + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + exec(sys.argv[1]) + else: + from pytest import main + main([__file__]) + +# vim: fdm=marker diff --git a/test/test_linalg_skeletonization.py b/test/test_linalg_skeletonization.py index 89e0eb091..6f33df48b 100644 --- a/test/test_linalg_skeletonization.py +++ b/test/test_linalg_skeletonization.py @@ -140,6 +140,107 @@ def test_skeletonize_symbolic(actx_factory, case, visualize=False): # }}} +# {{{ test_skeletonize_diagonal + +@pytest.mark.parametrize("case", [ + SKELETONIZE_TEST_CASES[0], + SKELETONIZE_TEST_CASES[1], + SKELETONIZE_TEST_CASES[2], + ]) +def test_skeletonize_diagonal(actx_factory, case, visualize=False): + import scipy.linalg.interpolative as sli # pylint:disable=no-name-in-module + sli.seed(42) + + actx = actx_factory() + rng = np.random.default_rng(42) + + if visualize: + logging.basicConfig(level=logging.INFO) + + # {{{ setup + + dd = sym.DOFDescriptor(case.name, discr_stage=case.skel_discr_stage) + resolution = case.resolutions[-1] + + qbx = case.get_layer_potential(actx, resolution, case.target_order) + places = GeometryCollection(qbx, auto_where=dd) + + tgt_src_index, ctree = case.get_tgt_src_cluster_index(actx, places, dd) + + sym_u, sym_op = case.get_operator(places.ambient_dim) + + # }}} + + # {{{ check + + from pytential.linalg.skeletonization import make_skeletonization_wrangler + wrangler = make_skeletonization_wrangler( + places, sym_op, sym_u, + auto_where=dd, context=case.knl_concrete_kwargs) + + from pytential.linalg.skeletonization import rec_skeletonize_by_proxy + skeletons = rec_skeletonize_by_proxy( + actx, places, ctree, tgt_src_index, sym_op, sym_u, + auto_where=dd, + context=case.knl_concrete_kwargs, + approx_nproxy=case.proxy_approx_count, + proxy_radius_factor=case.proxy_radius_factor, + id_eps=case.id_eps, + rng=rng, + _wrangler=wrangler, + ) + + from pytential.linalg.hmatrix import _update_skeleton_diagonal + for i in range(1, skeletons.size): + skeletons[i] = _update_skeleton_diagonal( + skeletons[i], skeletons[i - 1], ctree.levels[i - 1], + ) + + from pytential.linalg.cluster import cluster + parent = None + for k, clevel in enumerate(ctree.levels): + from pytential.linalg.utils import make_flat_cluster_diag + + tgt_src_index = skeletons[k].tgt_src_index + D1 = wrangler.evaluate_self(actx, places, tgt_src_index, 0, 0) + D1 = make_flat_cluster_diag(D1, tgt_src_index) + + if k == 0: + D = D0 = D1 + else: + skel_tgt_src_index = skeletons[k - 1].skel_tgt_src_index + assert skel_tgt_src_index.shape == tgt_src_index.shape + + D0 = wrangler.evaluate_self(actx, places, skel_tgt_src_index, 0, 0) + D0 = cluster(make_flat_cluster_diag(D0, skel_tgt_src_index), parent) + + D = D1 - D0 + + parent = clevel + + assert D1.shape == (skeletons[k].nclusters,) + assert D1.shape == D0.shape, (D1.shape, D0.shape) + assert D1.shape == D.shape, (D1.shape, D.shape) + + for i in range(skeletons[k].nclusters): + assert D1[i].shape == skeletons[k].tgt_src_index.cluster_shape(i, i) + assert D1[i].shape == D0[i].shape, (D1[i].shape, D0[i].shape) + + error = la.norm(D[i] - skeletons[k].D[i]) / (la.norm(D[i]) + 1.0e-12) + logger.info("level %04d / %04d cluster %3d (%4d, %4d) error %.12e", + k, ctree.nlevels, + ctree.tree_cluster_parent_ids[clevel.box_ids][i], + *skeletons[k].tgt_src_index.cluster_shape(i, i), error) + + assert error < 1.0e-15 + + logger.info("") + + # }}} + +# }}} + + # {{{ test_skeletonize_by_proxy