From e21546111bbcb42daec795e7739398f716ff5be0 Mon Sep 17 00:00:00 2001 From: Guillaume Fraux Date: Thu, 27 Jun 2024 17:45:38 +0200 Subject: [PATCH] Transform cartesian tensors into spherical ones --- .../api/python/utils/clebsch-gordan.rst | 5 + .../rascaline-torch/rascaline/torch/utils.py | 1 + .../tests/utils/cartesian_spherical.py | 98 ++++ python/rascaline/rascaline/utils/__init__.py | 6 +- python/rascaline/rascaline/utils/_backend.py | 3 + python/rascaline/rascaline/utils/_dispatch.py | 17 +- .../utils/clebsch_gordan/__init__.py | 2 + .../clebsch_gordan/_cartesian_spherical.py | 440 ++++++++++++++++++ .../utils/clebsch_gordan/_coefficients.py | 159 +++++-- .../clebsch_gordan/_correlate_density.py | 2 +- .../tests/utils/cartesian_spherical.py | 200 ++++++++ .../tests/utils/correlate_density.py | 4 +- 12 files changed, 895 insertions(+), 42 deletions(-) create mode 100644 python/rascaline-torch/tests/utils/cartesian_spherical.py create mode 100644 python/rascaline/rascaline/utils/clebsch_gordan/_cartesian_spherical.py create mode 100644 python/rascaline/tests/utils/cartesian_spherical.py diff --git a/docs/src/references/api/python/utils/clebsch-gordan.rst b/docs/src/references/api/python/utils/clebsch-gordan.rst index 7d1258348..d417dbba2 100644 --- a/docs/src/references/api/python/utils/clebsch-gordan.rst +++ b/docs/src/references/api/python/utils/clebsch-gordan.rst @@ -3,3 +3,8 @@ Clebsch-Gordan products .. autoclass:: rascaline.utils.DensityCorrelations :members: + + +.. autofunction:: rascaline.utils.cartesian_to_spherical + +.. autofunction:: rascaline.utils.calculate_cg_coefficients diff --git a/python/rascaline-torch/rascaline/torch/utils.py b/python/rascaline-torch/rascaline/torch/utils.py index 1700af0b4..6d986cf52 100644 --- a/python/rascaline-torch/rascaline/torch/utils.py +++ b/python/rascaline-torch/rascaline/torch/utils.py @@ -38,6 +38,7 @@ module.__dict__["Array"] = torch.Tensor module.__dict__["CalculatorBase"] = CalculatorModule module.__dict__["IntoSystem"] = System +module.__dict__["BACKEND_IS_METATENSOR_TORCH"] = True def is_labels(obj: Any): diff --git a/python/rascaline-torch/tests/utils/cartesian_spherical.py b/python/rascaline-torch/tests/utils/cartesian_spherical.py new file mode 100644 index 000000000..dea2cbe96 --- /dev/null +++ b/python/rascaline-torch/tests/utils/cartesian_spherical.py @@ -0,0 +1,98 @@ +import pytest +import torch +from metatensor.torch import Labels, TensorBlock, TensorMap + +from rascaline.torch.utils.clebsch_gordan import cartesian_to_spherical + + +@pytest.fixture +def cartesian(): + # the first block is completely symmetric + values_1 = torch.rand(10, 4, 3, 3, 3, 2, dtype=torch.float64) + values_1[:, :, 0, 1, 0, :] = values_1[:, :, 0, 0, 1, :] + values_1[:, :, 1, 0, 0, :] = values_1[:, :, 0, 0, 1, :] + + values_1[:, :, 0, 2, 0, :] = values_1[:, :, 0, 0, 2, :] + values_1[:, :, 2, 0, 0, :] = values_1[:, :, 0, 0, 2, :] + + values_1[:, :, 1, 0, 1, :] = values_1[:, :, 0, 1, 1, :] + values_1[:, :, 1, 1, 0, :] = values_1[:, :, 0, 1, 1, :] + + values_1[:, :, 2, 0, 2, :] = values_1[:, :, 0, 2, 2, :] + values_1[:, :, 2, 2, 0, :] = values_1[:, :, 0, 2, 2, :] + + values_1[:, :, 2, 1, 2, :] = values_1[:, :, 2, 2, 1, :] + values_1[:, :, 1, 2, 2, :] = values_1[:, :, 2, 2, 1, :] + + values_1[:, :, 1, 2, 1, :] = values_1[:, :, 1, 1, 2, :] + values_1[:, :, 2, 1, 1, :] = values_1[:, :, 1, 1, 2, :] + + values_1[:, :, 0, 2, 1, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 2, 0, 1, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 1, 0, 2, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 1, 2, 0, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 2, 1, 0, :] = values_1[:, :, 0, 1, 2, :] + + block_1 = TensorBlock( + values=values_1, + samples=Labels.range("s", 10), + components=[ + Labels.range("other", 4), + Labels.range("xyz_1", 3), + Labels.range("xyz_2", 3), + Labels.range("xyz_3", 3), + ], + properties=Labels.range("p", 2), + ) + + # second block does not have any specific symmetry + block_2 = TensorBlock( + values=torch.rand(12, 6, 3, 3, 3, 7, dtype=torch.float64), + samples=Labels.range("s", 12), + components=[ + Labels.range("other", 6), + Labels.range("xyz_1", 3), + Labels.range("xyz_2", 3), + Labels.range("xyz_3", 3), + ], + properties=Labels.range("p", 7), + ) + + return TensorMap(Labels.range("key", 2), [block_1, block_2]) + + +def test_torch_script(): + torch.jit.script(cartesian_to_spherical) + + +def test_cartesian_to_spherical(cartesian): + # rank 1 + spherical = cartesian_to_spherical(cartesian, components=["xyz_1"]) + + assert spherical.component_names == ["other", "o3_mu", "xyz_2", "xyz_3"] + assert spherical.keys.names == ["o3_lambda", "o3_sigma", "key"] + assert len(spherical.keys) == 2 + + # rank 2 + spherical = cartesian_to_spherical(cartesian, components=["xyz_1", "xyz_2"]) + + assert spherical.component_names == ["other", "o3_mu", "xyz_3"] + assert spherical.keys.names == ["o3_lambda", "o3_sigma", "key"] + assert len(spherical.keys) == 5 + + # rank 3 + spherical = cartesian_to_spherical( + cartesian, components=["xyz_1", "xyz_2", "xyz_3"] + ) + + assert spherical.component_names == ["other", "o3_mu"] + assert spherical.keys.names == [ + "o3_lambda", + "o3_sigma", + "l_3", + "k_1", + "l_2", + "l_1", + "key", + ] + assert len(spherical.keys) == 10 diff --git a/python/rascaline/rascaline/utils/__init__.py b/python/rascaline/rascaline/utils/__init__.py index b6bfe14ff..755e889c3 100644 --- a/python/rascaline/rascaline/utils/__init__.py +++ b/python/rascaline/rascaline/utils/__init__.py @@ -1,6 +1,10 @@ import os -from .clebsch_gordan import DensityCorrelations # noqa +from .clebsch_gordan import ( # noqa + DensityCorrelations, + calculate_cg_coefficients, + cartesian_to_spherical, +) from .power_spectrum import PowerSpectrum # noqa from .splines import ( # noqa AtomicDensityBase, diff --git a/python/rascaline/rascaline/utils/_backend.py b/python/rascaline/rascaline/utils/_backend.py index 5f2b198a0..3869b9b28 100644 --- a/python/rascaline/rascaline/utils/_backend.py +++ b/python/rascaline/rascaline/utils/_backend.py @@ -39,6 +39,8 @@ class TorchScriptClass: Array = Union[np.ndarray, TorchTensor] +BACKEND_IS_METATENSOR_TORCH = False + __all__ = [ "Array", "CalculatorBase", @@ -53,4 +55,5 @@ class TorchScriptClass: "torch_jit_is_scripting", "torch_jit_export", "is_labels", + "BACKEND_IS_METATENSOR_TORCH", ] diff --git a/python/rascaline/rascaline/utils/_dispatch.py b/python/rascaline/rascaline/utils/_dispatch.py index 1472e50a9..91c242d9f 100644 --- a/python/rascaline/rascaline/utils/_dispatch.py +++ b/python/rascaline/rascaline/utils/_dispatch.py @@ -437,8 +437,7 @@ def imag(array): """ Takes the imag part of the array - This function has the same behavior as - ``np.imag(array)`` or ``torch.imag(array)``. + This function has the same behavior as ``np.imag(array)`` or ``torch.imag(array)``. """ if isinstance(array, TorchTensor): return torch.imag(array) @@ -446,3 +445,17 @@ def imag(array): return np.imag(array) else: raise TypeError(UNKNOWN_ARRAY_TYPE) + + +def roll(array, shifts: List[int], axis: List[int]): + """ + Roll array elements along a given axis. + + This function has the same behavior as ``np.roll(array)`` or ``torch.roll(array)``. + """ + if isinstance(array, TorchTensor): + return torch.roll(array, shifts=shifts, dims=axis) + elif isinstance(array, np.ndarray): + return np.roll(array, shift=shifts, axis=axis) + else: + raise TypeError(UNKNOWN_ARRAY_TYPE) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py index 264186c65..aa3fa5ad2 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/__init__.py @@ -1 +1,3 @@ +from ._cartesian_spherical import cartesian_to_spherical # noqa: F401 +from ._coefficients import calculate_cg_coefficients # noqa: F401 from ._correlate_density import DensityCorrelations # noqa: F401 diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_cartesian_spherical.py b/python/rascaline/rascaline/utils/clebsch_gordan/_cartesian_spherical.py new file mode 100644 index 000000000..614502210 --- /dev/null +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_cartesian_spherical.py @@ -0,0 +1,440 @@ +""" +This module contains utilities to convert cartesian TensorMap to spherical and +respectively. +""" + +from typing import List, Optional + +import numpy as np + +from .. import _dispatch +from .._backend import ( + Array, + Labels, + TensorBlock, + TensorMap, + TorchTensor, + torch_jit_is_scripting, +) +from . import _coefficients + + +def cartesian_to_spherical( + tensor: TensorMap, + components: List[str], + keep_l_in_keys: Optional[bool] = None, + remove_blocks_threshold: Optional[float] = 1e-9, + cg_backend: Optional[str] = None, + cg_coefficients: Optional[TensorMap] = None, +) -> TensorMap: + """ + Transform a ``tensor`` of arbitrary rank from cartesian form to a spherical form. + + Starting from a tensor on a basis of product of cartesian coordinates, this function + computes the same tensor using a basis of spherical harmonics ``Y^M_L``. For + example, a rank 1 tensor with a single "xyz" component would be represented as a + single L=1 spherical harmonic; while a rank 5 tensor using a product basis ``ℝ^3 ⊗ + ℝ^3 ⊗ ℝ^3 ⊗ ℝ^3 ⊗ ℝ^3`` would require multiple blocks up to L=5 spherical harmonics. + + A single :py:class:`TensorBlock` in the input might correspond to multiple + :py:class:`TensorBlock` in the output. The output keys will contain all the + dimensions of the input keys, plus ``o3_lambda`` (indicating the spherical harmonics + degree) and ``o3_sigma`` (indicating that this block is a proper- or improper tensor + with ``+1`` and ``-1`` respectively). If ``keep_l_in_keys`` is ``True`` or if the + input tensor is a tensor of rank 3 or more, the keys will also contain multiple + ``l_{i}`` and ``k_{i}`` dimensions, which indicate which angular momenta have been + coupled together in which order to get this block. + + ``components`` specifies which ones of the components of the input + :py:class:`TensorMap` should be transformed from cartesian to spherical. All these + components will be replaced in the output by a single ``o3_mu`` component, + corresponding to the spherical harmonics ``M``. + + By default, symmetric tensors will only contain blocks corresponding to + ``o3_sigma=1``. This is achieved by checking the norm of the blocks after the full + calculation; and dropping any block with a norm below ``remove_blocks_epsilon``. To + keep all blocks regardless of their norm, you can set + ``remove_blocks_epsilon=None``. + + :param tensor: input tensor, using a cartesian product basis + :param components: components of the input tensor to transform into spherical + components + :param keep_l_in_keys: should the output contains the values of angular momenta that + where combined together? This defaults to ``False`` for rank 1 and 2 tensors, + and ``True`` for all other tensors. + + Keys named ``l_{i}`` correspond to the input ``components``, with ``l_1`` being + the last entry in ``components`` and ``l_N`` the first one. Keys named ``k_{i}`` + correspond to intermediary spherical components created during the calculation, + i.e. a ``k_{i}`` used to be ``o3_lambda``. + :param remove_blocks_threshold: Numerical tolerance to use when determining if a + block's norm is zero or not. Blocks with zero norm will be excluded from the + output. Set this to ``None`` to keep all blocks in the output. + :param cg_backend: Backend to use for Clebsch-Gordan calculations. This can be + ``"python-dense"`` or ``"python-sparse"`` for dense or sparse operations + respectively. If ``None``, this is automatically determined. + :param cg_coefficients: Cache containing Clebsch-Gordan coefficients. This is + optional except when using this function from TorchScript. The coefficients + should be computed with :py:func:`calculate_cg_coefficients`, using the same + ``cg_backend`` as this function. + + :return: :py:class:`TensorMap` containing spherical components instead of cartesian + components. + """ + if len(tensor) == 0 or len(components) == 0: + # nothing to do + return tensor + + if not isinstance(components, list): + raise TypeError(f"`components` should be a list, got {type(components)}") + + if keep_l_in_keys is None: + if len(components) < 3: + keep_l_in_keys = False + else: + keep_l_in_keys = True + + axes_to_convert: List[int] = [] + all_component_names = tensor.component_names + for component in components: + if component in all_component_names: + idx = all_component_names.index(component) + axes_to_convert.append(idx + 1) + else: + raise ValueError(f"'{component}' is not part of this tensor components") + + for key, block in tensor.items(): + for idx in axes_to_convert: + values_list = _dispatch.to_int_list(block.components[idx - 1].values[:, 0]) + if values_list != [0, 1, 2]: + name = block.components[idx - 1].names[0] + raise ValueError( + f"component '{name}' in block for {key.print()} should have " + f"[0, 1, 2] as values, got {values_list} instead" + ) + + # we need components to be consecutive + if list(range(axes_to_convert[0], axes_to_convert[-1] + 1)) != axes_to_convert: + raise ValueError( + f"this function only supports consecutive components, {components} are not" + ) + + key_names = tensor.keys.names + if "o3_lambda" in key_names: + raise ValueError( + "this tensor already has an `o3_lambda` key, " + "is it already in spherical form?" + ) + + for i in range(len(components)): + if f"l_{i}" in key_names: + raise ValueError( + f"this tensor already has an `l_{i}` key, " + "is it already in spherical form?" + ) + + tensor_rank = len(components) + if tensor_rank > 2 and not keep_l_in_keys: + raise ValueError( + "`keep_l_in_keys` must be `True` for tensors of rank 3 and above" + ) + + if torch_jit_is_scripting(): + use_torch = True + else: + if isinstance(tensor.block(0).values, TorchTensor): + use_torch = True + elif isinstance(tensor.block(0).values, np.ndarray): + use_torch = False + else: + raise TypeError( + f"unknown array type in tensor ({type(tensor.block(0).values)}), " + "only numpy and torch are supported" + ) + + if cg_backend is None: + # TODO: benchmark & change the default? + if use_torch: + cg_backend = "python-dense" + else: + cg_backend = "python-sparse" + + new_component_names: List[str] = [] + for idx in range(len(tensor.component_names)): + if idx + 1 in axes_to_convert: + if len(axes_to_convert) == 1: + new_component_names.append("o3_mu") + else: + new_component_names.append(f"__internal_to_convert_{idx}") + else: + new_component_names.append("") + + # Step 1: transform xyz dimensions to o3_lambda=1 dimensions + # This is done with `roll`, since (y, z, x) is the same as m = (-1, 0, 1) + new_blocks: List[TensorBlock] = [] + for block in tensor.blocks(): + values = _dispatch.roll( + block.values, + shifts=[-1] * len(axes_to_convert), + axis=axes_to_convert, + ) + + new_components: List[Labels] = [] + for idx, component in enumerate(block.components): + if idx + 1 in axes_to_convert: + new_components.append( + Labels( + names=[new_component_names[idx]], + values=_dispatch.int_array_like( + [[-1], [0], [1]], component.values + ), + ) + ) + else: + new_components.append(component) + + new_blocks.append( + TensorBlock(values, block.samples, new_components, block.properties) + ) + + if tensor_rank == 1: + new_keys = tensor.keys + # we are done, add o3_lambda/o3_sigma/l_1 to the keys & return + if keep_l_in_keys: + new_keys = new_keys.insert( + 0, + "l_1", + _dispatch.int_array_like([1] * len(new_keys), new_keys.values), + ) + + new_keys = new_keys.insert( + 0, + "o3_sigma", + _dispatch.int_array_like([1] * len(tensor.keys), tensor.keys.values), + ) + + new_keys = new_keys.insert( + 0, + "o3_lambda", + _dispatch.int_array_like([1] * len(tensor.keys), tensor.keys.values), + ) + + return TensorMap(new_keys, new_blocks) + + # Step 2: if there is more than one dimension, couple them with CG coefficients + # + # We start from an array of shape [..., 3, 3, 3, 3, 3, ...] with as many 3 as + # `len(components)`. Then we iteratively combine the two rightmost components into + # as many new lambda/mu entries as required, until there is only one component left. + # Each step will create multiple blocks (corresponding to the different o3_lambda + # created by combining two o3_lambda=1 terms), that might on their turn create more + # blocks if more combinations are required. + # + # For example, with a rank 3 tensor we go through the following: + # + # - Step 1: [..., 3, 3, 3, ...] => [..., 3, 1, ...] (o3_lambda=0, o3_sigma=+1) + # => [..., 3, 3, ...] (o3_lambda=1, o3_sigma=-1) + # => [..., 3, 5, ...] (o3_lambda=2, o3_sigma=+1) + # + # - Step 2: [..., 3, 1, ...] => [..., 3, ...] (o3_lambda=1, o3_sigma=+1) + # [..., 3, 3, ...] => [..., 1, ...] (o3_lambda=0, o3_sigma=-1) + # => [..., 3, ...] (o3_lambda=1, o3_sigma=+1) + # => [..., 5, ...] (o3_lambda=2, o3_sigma=-1) + # [..., 3, 5, ...] => [..., 3, ...] (o3_lambda=1, o3_sigma=+1) + # => [..., 5, ...] (o3_lambda=2, o3_sigma=-1) + # => [..., 7, ...] (o3_lambda=3, o3_sigma=+1) + if cg_coefficients is None: + if torch_jit_is_scripting(): + raise ValueError( + "in TorchScript mode, `cg_coefficients` must be pre-computed " + "and given to this function explicitly" + ) + else: + cg_coefficients = _coefficients.calculate_cg_coefficients( + lambda_max=len(axes_to_convert), + cg_backend=cg_backend, + use_torch=use_torch, + ) + + iteration_index = 0 + while len(axes_to_convert) > 1: + tensor = _do_coupling( + tensor=tensor, + component_1=axes_to_convert[-2] - 1, + component_2=axes_to_convert[-1] - 1, + cg_coefficients=cg_coefficients, + cg_backend=cg_backend, + keep_l_in_keys=keep_l_in_keys, + iteration_index=iteration_index, + ) + + axes_to_convert.pop() + iteration_index += 1 + + if remove_blocks_threshold is None: + return tensor + + # Step 3: for symmetry reasons, some of the blocks will be zero everywhere (for + # example o3_sigma=-1 blocks if the input tensor is fully symmetric). If the user + # gave us a threshold, we remove all blocks with a norm below this threshold. + new_keys_values: List[Array] = [] + new_blocks: List[TensorBlock] = [] + for key_idx, block in enumerate(tensor.blocks()): + key = tensor.keys.entry(key_idx) + values = block.values.reshape(-1, 1) + norm = values.T @ values + if norm > remove_blocks_threshold: + new_keys_values.append(key.values.reshape(1, -1)) + new_blocks.append( + TensorBlock( + values=block.values, + samples=block.samples, + components=block.components, + properties=block.properties, + ) + ) + + return TensorMap( + Labels(tensor.keys.names, _dispatch.concatenate(new_keys_values, axis=0)), + new_blocks, + ) + + +def _do_coupling( + tensor: TensorMap, + component_1: int, + component_2: int, + keep_l_in_keys: bool, + iteration_index: int, + cg_coefficients: TensorMap, + cg_backend: str, +) -> TensorMap: + """ + Go from an uncoupled product basis that behave like a product of spherical harmonics + to a coupled basis that behaves like a single spherical harmonic. + + This function takes in a :py:class:`TensorMap` where two of the components + (indicated by ``component_1`` and ``component_2``) behave like spherical harmonics + ``Y^m1_l1`` and ``Y^m2_l2``, and project it onto a single spherical harmonic + ``Y^M_L``. This transformation uses the following relation: + + ``|L M> = |l1 l2 L M> = \\sum_{m1 m2} |l1 m1> |l2 m2>`` + + where ```` are Clebsch-Gordan coefficients. + + The output will contain many blocks for each block in the input, matching all the + different ``L`` (called ``o3_lambda`` in the code) required to do a full projection. + + This process can be iterated: a multi-dimensional array that is the product of many + ``Y^m_l`` can be turned into a set of multiple terms transforming as a single + ``Y^M_L``. + + :param tensor: input :py:class:`TensorMap` + :param components_1: first component of the ``tensor`` behaving like spherical + harmonics + :param components_2: second component of the ``tensor`` behaving like spherical + harmonics + :param keep_l_in_keys: whether ``l1`` and ``l2`` (the original spherical harmonic + degrees) should be kept in the keys. This can be useful to undo this + transformation (or even required if there is more than one path to get to a + given value for ``o3_lambda``) + :param iteration_index: when iterating the coupling, this should be the number of + iterations already done (i.e. the number of time this function has been called) + :param cg_coefficients: pre-computed set of Clebsch-Gordan coefficients + :param cg_backend: which backend to use for the calculations + + :return: :py:class:`TensorMap` using the coupled basis. This will contain the same + keys as the input ``tensor``, plus ``o3_lambda``. The components in positions + ``components_1`` and ``components_2`` will be replaced by a single ``o3_mu`` + component. + """ + assert component_2 == component_1 + 1 + + new_keys = tensor.keys + + if "o3_lambda" in tensor.keys.names: + old_sigmas = new_keys.column("o3_sigma") + new_keys = new_keys.remove("o3_sigma") + else: + old_sigmas = _dispatch.int_array_like([1] * len(new_keys), new_keys.values) + + if keep_l_in_keys: + array_of_ones = _dispatch.int_array_like([1] * len(new_keys), new_keys.values) + if "o3_lambda" in tensor.keys.names: + assert iteration_index > 0 + new_keys = new_keys.rename("o3_lambda", f"k_{iteration_index}") + new_keys = new_keys.insert(0, f"l_{iteration_index + 2}", array_of_ones) + else: + assert iteration_index == 0 + new_keys = new_keys.insert(0, "l_1", array_of_ones) + new_keys = new_keys.insert(0, "l_2", array_of_ones) + + new_keys_values: List[List[int]] = [] + new_blocks: List[TensorBlock] = [] + for key_idx, block in enumerate(tensor.blocks()): + key = new_keys.entry(key_idx) + old_sigma = int(old_sigmas[key_idx]) + + # get l1, l2 from the block's shape + block_shape = block.values.shape + l1 = (block_shape[component_1 + 1] - 1) // 2 + l2 = (block_shape[component_2 + 1] - 1) // 2 + + # reshape the values to look like (n_s, 2*l1 + 1, 2*l2 + 1, n_p) + shape_before = 1 + for axis in range(component_1 + 1): + shape_before *= block_shape[axis] + + shape_after = 1 + for axis in range(component_2 + 2, len(block_shape)): + shape_after *= block_shape[axis] + + array = block.values.reshape( + shape_before, + block.values.shape[component_1 + 1], + block.values.shape[component_2 + 1], + shape_after, + ) + + # generate the set of o3_lambda to compute + o3_lambdas = list(range(max(l1, l2) - min(l1, l2), (l1 + l2) + 1)) + + # actual calculation + outputs = _coefficients.cg_couple( + array, o3_lambdas, cg_coefficients, cg_backend + ) + + # create one block for each output of `cg_couple` + for o3_lambda, values in zip(o3_lambdas, outputs): + o3_sigma = int(old_sigma * (-1) ** (l1 + l2 + o3_lambda)) + new_keys_values.append( + [o3_lambda, o3_sigma] + _dispatch.to_int_list(key.values) + ) + + new_shape = list(block.values.shape) + new_shape.pop(component_2 + 1) + new_shape[component_1 + 1] = 2 * o3_lambda + 1 + + new_components = block.components + new_components.pop(component_2) + new_components[component_1] = Labels( + "o3_mu", + _dispatch.int_array_like( + [[mu] for mu in range(-o3_lambda, o3_lambda + 1)], new_keys.values + ), + ) + + new_blocks.append( + TensorBlock( + values.reshape(new_shape), + samples=block.samples, + components=new_components, + properties=block.properties, + ) + ) + + new_keys = Labels( + ["o3_lambda", "o3_sigma"] + new_keys.names, + _dispatch.int_array_like(new_keys_values, new_keys.values), + ) + return TensorMap(new_keys, new_blocks) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_coefficients.py b/python/rascaline/rascaline/utils/clebsch_gordan/_coefficients.py index 2d48ed423..7913b7099 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_coefficients.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_coefficients.py @@ -10,7 +10,13 @@ import wigners from .. import _dispatch -from .._backend import Array, Labels, TensorBlock, TensorMap +from .._backend import ( + BACKEND_IS_METATENSOR_TORCH, + Array, + Labels, + TensorBlock, + TensorMap, +) try: @@ -41,37 +47,34 @@ class torch_device: def calculate_cg_coefficients( lambda_max: int, - sparse: bool = True, - use_torch: bool = False, + cg_backend: str, + use_torch: bool = BACKEND_IS_METATENSOR_TORCH, ) -> TensorMap: """ - Calculates the Clebsch-Gordan coefficients for all possible combination of l1 and - l2, up to ``lambda_max``. Returns them in :py:class:`TensorMap` format. + Calculates the Clebsch-Gordan coefficients for all possible combination of angular + momenta up to ``lambda_max``. - The output data structure of the output :py:class:`TensorMap` depends on whether the - backend used to perform CG tensor products uses sparse or dense operations. + The structure of the returned :py:class:`TensorMap` depends on whether the backend + used to perform CG tensor products uses sparse or dense operations. Dense: - - samples: `_`, i.e. a dummy sample. - - components: `[(m1,), (m2,), (mu,)]`, i.e. on separate components axes, - where `m1` and `m2` are the m component values for the two arrays - being combined and `mu` is the m component value for the resulting - array. - - properties: `cg_coefficient` + - samples: ``_``, i.e. a dummy sample. + - components: ``[m1, m2, mu]`` on separate components axes, where ``m1`` and + ``m2`` are the m component values for the two arrays being combined and ``mu`` + is the m component value for the resulting array. + - properties: ``cg_coefficient = [0]`` Sparse: - - samples: `(m1, m2, mu)`, where `m1` and `m2` are the m component - values for the two arrays being combined and `mu` is the m component - value for the resulting array. - - components: `[]`, i.e. no components axis. - - properties: `cg_coefficient` - + - samples: ``(m1, m2, mu)``, where ``m1`` and ``m2`` are the m component values + for the two arrays being combined and ``mu`` is the m component value for the + resulting array. + - components: ``[]``, i.e. no components axis. + - properties: ``cg_coefficient = [0]`` :param lambda_max: maximum angular momentum value to compute CG coefficients for. - :param sparse: whether to store the CG coefficients in sparse format. - :param use_torch: whether torch tensor or numpy arrays should be used for the cg - coeffs - + :param cg_backend: TODO + :param use_torch: whether torch tensor or numpy arrays should be used to store the + coefficients :returns: :py:class:`TensorMap` of the Clebsch-Gordan coefficients. """ # Build some 'like' arrays for the backend dispatch @@ -97,14 +100,19 @@ def calculate_cg_coefficients( # Build the CG cache depending on whether the CG backend is sparse or dense. The # dispatching of the arrays backends are accounted for by `double_like` and # `labels_values_like`. - if sparse: + if cg_backend == "python-sparse": return _cg_coeff_dict_to_tensormap_sparse( cg_coeff_dict, double_like, labels_values_like ) - - return _cg_coeff_dict_to_tensormap_dense( - cg_coeff_dict, double_like, labels_values_like - ) + elif cg_backend == "python-dense": + return _cg_coeff_dict_to_tensormap_dense( + cg_coeff_dict, double_like, labels_values_like + ) + else: + raise ValueError( + f"invalid `cg_backend`, got '{cg_backend}', " + "only 'python-dense', or 'python-sparse' are supported" + ) def _build_dense_cg_coeff_dict( @@ -395,6 +403,83 @@ def _complex_clebsch_gordan_matrix(l1: int, l2: int, o3_lambda: int, like: Array # ================================================================ # +def cg_couple( + array: Array, + o3_lambdas: List[int], + cg_coefficients: TensorMap, + cg_backend: str, +) -> List[Array]: + """ + Go from an uncoupled product basis that behave like a product of spherical harmonics + to a coupled basis that behaves like a single spherical harmonic. + + The ``array`` shape should be ``(n_samples, 2 * l1 + 1, 2 * l2 + 1, n_q)``. + ``n_samples`` is the number of samples, and ``n_q`` is the number of properties. + + This function will output a list of arrays, whose shape will be ``[n_samples, (2 * + o3_lambda+1), n_q]``, with the requested ``o3_lambdas``. + + These arrays will contain the result of projecting from a product of spherical + harmonic with degree ``l1`` and ``l2`` to a single spherical harmonic of degree + ``o3_lambdas``; using Clebsch-Gordan coefficients. The values for ``l1`` and ``l2`` + are inferred from the size of the middle axis of the input array. + + The Clebsch-Gordan coefficients are cached in ``cg_coefficients``. These must be + computed by :py:func:`calculate_cg_coefficients`. + + The operation is dispatched such that numpy arrays or torch tensors are + automatically handled. + + :param array: input array with shape ``[n_samples, 2 * l1 + 1, 2 * l2 + 1, n_q]`` + :param o3_lambdas: list of degrees of spherical harmonics to compute + :param cg_coefficients: CG coefficients as returned by + :py:func:`calculate_cg_coefficients` with the same ``cg_backed`` given to this + function + :param cg_backend: specifies the backend to use for the calculation. It can be + ``"python-dense"``, or ``"python-sparse"``. + + :return: A list of array, one for each ``o3_lambda`` + """ + + assert len(array.shape) == 4 + + l1 = (array.shape[1] - 1) // 2 + l2 = (array.shape[2] - 1) // 2 + + if cg_backend == "python-sparse": + arrays = {} + for m1 in range(2 * l1 + 1): + for m2 in range(2 * l2 + 1): + arrays[str((m1, m2))] = array[:, m1, m2, :] + + return [ + _cg_couple_sparse(arrays, l1, l2, o3_lambda, cg_coefficients) + for o3_lambda in o3_lambdas + ] + elif cg_backend == "python-dense": + results = [] + + n_samples = array.shape[0] + n_properties = array.shape[3] + + array = array.swapaxes(1, 3) + array = array.reshape(n_samples * n_properties, 2 * l2 + 1, 2 * l1 + 1) + + for o3_lambda in o3_lambdas: + result = _cg_couple_dense(array, o3_lambda, cg_coefficients) + result = result.reshape(n_samples, n_properties, -1) + result = result.swapaxes(1, 2) + results.append(result) + + return results + + else: + raise ValueError( + f"invalid `cg_backend`, got '{cg_backend}', " + "only 'python-dense', or 'python-sparse' are supported" + ) + + def _cg_couple_sparse( arrays: Dict[str, Array], l1: int, @@ -414,7 +499,7 @@ def _cg_couple_sparse( ``[samples, properties]`` and correspond to a single ``(m1, m2)`` pair. :param o3_lambda: value of lambda for the output spherical harmonic :param cg_coefficients: CG coefficients as returned by - :py:func:`calculate_cg_coefficients` with ``sparse=True`` + :py:func:`calculate_cg_coefficients` with ``cg_backed="python-sparse"`` """ array = list(arrays.values())[0] @@ -450,7 +535,7 @@ def _cg_couple_dense( :param array: input array, we expect a shape of ``[samples, 2*l1 + 1, 2*l2 + 1]`` :param o3_lambda: value of lambda for the output spherical harmonic :param cg_coefficients: CG coefficients as returned by - :py:func:`calculate_cg_coefficients` with ``sparse=False`` + :py:func:`calculate_cg_coefficients` with ``cg_backed="python-dense"`` """ assert len(array.shape) == 3 @@ -490,7 +575,7 @@ def cg_tensor_product( samples in both array must be the same. This function will output a list of arrays, whose shape will be ``[n_samples, (2 * - o3_lambda+1), n_q * n_p]``, with the specified ``o3_lambdas``. These arrays will + o3_lambda+1), n_q * n_p]``, with the requested ``o3_lambdas``. These arrays will contain the result of computing a tensor product of ``array_1`` and ``array_2``, followed by a projection from the product of spherical harmonic with degree ``l1`` and ``l2`` to a single spherical harmonic of degree ``o3_lambdas``; using @@ -516,11 +601,11 @@ def cg_tensor_product( :py:func:`calculate_cg_coefficients`. Only used if ``cg_backend`` is not ``"metadata"``. :param cg_backend: specifies the backend to use for the calculation. It can be - ``"python-dense"``, ``"python-sparse"``, and ``"metadata"``. If + ``"python-dense"``, ``"python-sparse"``, or ``"metadata"``. If ``"python-dense"`` or ``"python-sparse"`` is chosen, a dense or sparse - combination (respectively) of the arrays is performed using either numpy or - torch, depending on the arrays. If ``"metadata"`` is chosen, no combination is - performed, and an empty array of the correct shape is returned instead. + combination (respectively) of the arrays is performed. If ``"metadata"`` is + chosen, no combination is performed, and an empty array of the correct shape is + returned instead. :returns: list of arrays of shape ``[n_samples, (2*o3_lambda+1), n_q * n_p]`` """ @@ -537,8 +622,8 @@ def cg_tensor_product( return _cg_tensor_product_dense(array_1, array_2, o3_lambdas, cg_coefficients) else: raise ValueError( - f"Wrong cg_backend, got '{cg_backend}', " - "but only support 'metadata', 'python-dense', or 'python-sparse'" + f"invalid `cg_backend`, got '{cg_backend}', " + "only 'python-dense', 'python-sparse', or 'metadata' are supported" ) diff --git a/python/rascaline/rascaline/utils/clebsch_gordan/_correlate_density.py b/python/rascaline/rascaline/utils/clebsch_gordan/_correlate_density.py index f6aea4d9a..89c4e0db1 100644 --- a/python/rascaline/rascaline/utils/clebsch_gordan/_correlate_density.py +++ b/python/rascaline/rascaline/utils/clebsch_gordan/_correlate_density.py @@ -152,7 +152,7 @@ def __init__( self._max_angular = max_angular self._cg_coefficients = _coefficients.calculate_cg_coefficients( lambda_max=self._max_angular, - sparse=self._cg_backend == "python-sparse", + cg_backend=self._cg_backend, use_torch=(arrays_backend == "torch"), ) diff --git a/python/rascaline/tests/utils/cartesian_spherical.py b/python/rascaline/tests/utils/cartesian_spherical.py new file mode 100644 index 000000000..0a8745ae7 --- /dev/null +++ b/python/rascaline/tests/utils/cartesian_spherical.py @@ -0,0 +1,200 @@ +import numpy as np +import pytest +from metatensor import Labels, TensorBlock, TensorMap + +from rascaline.utils.clebsch_gordan import cartesian_to_spherical + + +@pytest.fixture +def cartesian(): + # the first block is completely symmetric + values_1 = np.random.rand(10, 4, 3, 3, 3, 2) + values_1[:, :, 0, 1, 0, :] = values_1[:, :, 0, 0, 1, :] + values_1[:, :, 1, 0, 0, :] = values_1[:, :, 0, 0, 1, :] + + values_1[:, :, 0, 2, 0, :] = values_1[:, :, 0, 0, 2, :] + values_1[:, :, 2, 0, 0, :] = values_1[:, :, 0, 0, 2, :] + + values_1[:, :, 1, 0, 1, :] = values_1[:, :, 0, 1, 1, :] + values_1[:, :, 1, 1, 0, :] = values_1[:, :, 0, 1, 1, :] + + values_1[:, :, 2, 0, 2, :] = values_1[:, :, 0, 2, 2, :] + values_1[:, :, 2, 2, 0, :] = values_1[:, :, 0, 2, 2, :] + + values_1[:, :, 2, 1, 2, :] = values_1[:, :, 2, 2, 1, :] + values_1[:, :, 1, 2, 2, :] = values_1[:, :, 2, 2, 1, :] + + values_1[:, :, 1, 2, 1, :] = values_1[:, :, 1, 1, 2, :] + values_1[:, :, 2, 1, 1, :] = values_1[:, :, 1, 1, 2, :] + + values_1[:, :, 0, 2, 1, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 2, 0, 1, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 1, 0, 2, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 1, 2, 0, :] = values_1[:, :, 0, 1, 2, :] + values_1[:, :, 2, 1, 0, :] = values_1[:, :, 0, 1, 2, :] + + block_1 = TensorBlock( + values=values_1, + samples=Labels.range("s", 10), + components=[ + Labels.range("other", 4), + Labels.range("xyz_1", 3), + Labels.range("xyz_2", 3), + Labels.range("xyz_3", 3), + ], + properties=Labels.range("p", 2), + ) + + # second block does not have any specific symmetry + block_2 = TensorBlock( + values=np.random.rand(12, 6, 3, 3, 3, 7), + samples=Labels.range("s", 12), + components=[ + Labels.range("other", 6), + Labels.range("xyz_1", 3), + Labels.range("xyz_2", 3), + Labels.range("xyz_3", 3), + ], + properties=Labels.range("p", 7), + ) + + return TensorMap(Labels.range("key", 2), [block_1, block_2]) + + +def test_cartesian_to_spherical_rank_1(cartesian): + spherical = cartesian_to_spherical(cartesian, components=["xyz_1"]) + + assert spherical.component_names == ["other", "o3_mu", "xyz_2", "xyz_3"] + assert spherical.keys.names == ["o3_lambda", "o3_sigma", "key"] + + spherical = cartesian_to_spherical( + cartesian, + components=["xyz_1"], + keep_l_in_keys=True, + ) + assert spherical.keys.names == ["o3_lambda", "o3_sigma", "l_1", "key"] + + +def test_cartesian_to_spherical_rank_2(cartesian): + spherical = cartesian_to_spherical(cartesian, components=["xyz_1", "xyz_2"]) + + assert spherical.component_names == ["other", "o3_mu", "xyz_3"] + assert spherical.keys == Labels( + ["o3_lambda", "o3_sigma", "key"], + np.array( + [ + # only o3_sigma=1 in the symmetric block + [0, 1, 0], + [2, 1, 0], + # all o3_sigma in the non-symmetric block + [0, 1, 1], + [1, -1, 1], + [2, 1, 1], + ] + ), + ) + + spherical = cartesian_to_spherical( + cartesian, + components=["xyz_1", "xyz_2"], + keep_l_in_keys=True, + remove_blocks_threshold=None, + ) + assert spherical.keys.names == ["o3_lambda", "o3_sigma", "l_2", "l_1", "key"] + # all blocks are kept + assert len(spherical.keys) == 6 + + +def test_cartesian_to_spherical_rank_3(cartesian): + spherical = cartesian_to_spherical( + cartesian, components=["xyz_1", "xyz_2", "xyz_3"] + ) + + assert spherical.component_names == ["other", "o3_mu"] + assert spherical.keys == Labels( + ["o3_lambda", "o3_sigma", "l_3", "k_1", "l_2", "l_1", "key"], + np.array( + [ + # only o3_sigma=1 for the symmetric block, but there are multiple "path" + # ("l_3", "k_1", "l_2", "l_1") that lead to o3_lambda=1 + [1, 1, 1, 0, 1, 1, 0], + [1, 1, 1, 2, 1, 1, 0], + [3, 1, 1, 2, 1, 1, 0], + # all possible o3_sigma for the non-symmetric block + [1, 1, 1, 0, 1, 1, 1], + [0, -1, 1, 1, 1, 1, 1], + [1, 1, 1, 1, 1, 1, 1], + [2, -1, 1, 1, 1, 1, 1], + [1, 1, 1, 2, 1, 1, 1], + [2, -1, 1, 2, 1, 1, 1], + [3, 1, 1, 2, 1, 1, 1], + ] + ), + ) + + spherical = cartesian_to_spherical( + cartesian, + components=["xyz_1", "xyz_2", "xyz_3"], + remove_blocks_threshold=None, + ) + # all blocks are kept, even those with norm=0 + assert len(spherical.keys) == 14 + + +@pytest.mark.parametrize("cg_backend", ["python-dense", "python-sparse"]) +@pytest.mark.parametrize( + "components", [["xyz_1"], ["xyz_1", "xyz_12"], ["xyz_1", "xyz_2", "xyz_3"]] +) +def test_cartesian_to_spherical_and_back(cartesian, components, cg_backend): + spherical = cartesian_to_spherical( + cartesian, + components=["xyz_1", "xyz_2", "xyz_3"], + keep_l_in_keys=True, + cg_backend=cg_backend, + ) + + assert "o3_lambda" in spherical.keys.names + # TODO: check for identity after spherical_to_cartesian + + +def test_cartesian_to_spherical_errors(cartesian): + message = "`components` should be a list, got " + with pytest.raises(TypeError, match=message): + cartesian_to_spherical(cartesian, components="xyz_1") + + message = "'1' is not part of this tensor components" + with pytest.raises(ValueError, match=message): + cartesian_to_spherical(cartesian, components=[1, 2]) + + message = "'not_there' is not part of this tensor components" + with pytest.raises(ValueError, match=message): + cartesian_to_spherical(cartesian, components=["not_there"]) + + message = ( + "this function only supports consecutive components, " + "\\['xyz_2', 'xyz_1'\\] are not" + ) + with pytest.raises(ValueError, match=message): + cartesian_to_spherical(cartesian, components=["xyz_2", "xyz_1"]) + + message = ( + "this function only supports consecutive components, " + "\\['xyz_1', 'xyz_3'\\] are not" + ) + with pytest.raises(ValueError, match=message): + cartesian_to_spherical(cartesian, components=["xyz_1", "xyz_3"]) + + message = ( + "component 'other' in block for \\(key=0\\) should have \\[0, 1, 2\\] " + "as values, got \\[0, 1, 2, 3\\] instead" + ) + with pytest.raises(ValueError, match=message): + cartesian_to_spherical(cartesian, components=["other"]) + + message = "`keep_l_in_keys` must be `True` for tensors of rank 3 and above" + with pytest.raises(ValueError, match=message): + cartesian_to_spherical( + cartesian, + components=["xyz_1", "xyz_2", "xyz_3"], + keep_l_in_keys=False, + ) diff --git a/python/rascaline/tests/utils/correlate_density.py b/python/rascaline/tests/utils/correlate_density.py index f8f9ad0b4..89fc8417f 100644 --- a/python/rascaline/tests/utils/correlate_density.py +++ b/python/rascaline/tests/utils/correlate_density.py @@ -376,7 +376,9 @@ def test_clebsch_gordan_orthogonality(l1, l2, arrays_backend): for details. """ cg_coeffs = calculate_cg_coefficients( - lambda_max=5, sparse=False, use_torch=arrays_backend == "torch" + lambda_max=5, + cg_backend="python-dense", + use_torch=arrays_backend == "torch", ) lam_min = abs(l1 - l2)