From 7265c5231ba95698577ebad8df9ab4a369ed336f Mon Sep 17 00:00:00 2001 From: Philip Loche Date: Mon, 26 Feb 2024 20:55:36 +0100 Subject: [PATCH] Rename "structures" -> "systems" (#110) --- .../src/dev-docs/utils/data/readers/index.rst | 8 +- .../dev-docs/utils/data/readers/structure.rst | 13 --- .../dev-docs/utils/data/readers/systems.rst | 13 +++ .../getting-started/custom_dataset_conf.rst | 20 ++--- docs/src/getting-started/override.rst | 2 +- docs/src/getting-started/usage.rst | 4 +- docs/static/qm9/eval.yaml | 2 +- docs/static/qm9/options.yaml | 4 +- examples/alchemical_model/eval.yaml | 2 +- examples/alchemical_model/options.yaml | 4 +- examples/ase/options.yaml | 4 +- examples/ase/run_ase.py | 8 +- examples/basic_usage/usage.sh | 2 +- src/metatensor/models/cli/eval.py | 40 ++++----- src/metatensor/models/cli/train.py | 28 +++--- .../experimental/alchemical_model/model.py | 8 +- .../tests/test_functionality.py | 4 +- .../alchemical_model/tests/test_invariance.py | 10 +-- .../alchemical_model/tests/test_regression.py | 18 ++-- .../experimental/alchemical_model/train.py | 18 ++-- .../utils/systems_to_torch_spex_dict.py | 12 +-- .../models/experimental/soap_bpnn/model.py | 2 +- .../soap_bpnn/tests/test_continue.py | 12 +-- .../soap_bpnn/tests/test_functionality.py | 18 ++-- .../soap_bpnn/tests/test_invariance.py | 10 +-- .../soap_bpnn/tests/test_regression.py | 18 ++-- .../models/experimental/soap_bpnn/train.py | 8 +- src/metatensor/models/utils/composition.py | 8 +- src/metatensor/models/utils/compute_loss.py | 6 +- src/metatensor/models/utils/data/__init__.py | 2 +- src/metatensor/models/utils/data/dataset.py | 24 +++-- .../models/utils/data/readers/__init__.py | 2 +- .../models/utils/data/readers/readers.py | 20 ++--- .../utils/data/readers/structures/__init__.py | 4 - .../utils/data/readers/systems/__init__.py | 4 + .../readers/{structures => systems}/ase.py | 8 +- .../models/utils/data/readers/targets/ase.py | 20 ++--- .../models/utils/data/writers/__init__.py | 8 +- .../models/utils/data/writers/xyz.py | 6 +- .../models/utils/extract_targets.py | 2 +- src/metatensor/models/utils/normalize.py | 16 ++-- src/metatensor/models/utils/omegaconf.py | 44 +++++---- tests/cli/test_train_model.py | 56 ++++++------ tests/resources/options.yaml | 2 +- .../data/structures/test_structures_ase.py | 12 +-- tests/utils/data/targets/test_targets_ase.py | 58 ++++++------ tests/utils/data/test_combine_dataloaders.py | 18 ++-- tests/utils/data/test_dataset.py | 14 +-- tests/utils/data/test_readers.py | 90 +++++++++---------- tests/utils/data/test_target_writers.py | 28 +++--- tests/utils/test_compute_loss.py | 22 +++-- tests/utils/test_model_io.py | 8 +- tests/utils/test_neighbor_list.py | 8 +- tests/utils/test_omegaconf.py | 52 +++++------ tests/utils/test_output_gradient.py | 40 ++++----- 55 files changed, 427 insertions(+), 447 deletions(-) delete mode 100644 docs/src/dev-docs/utils/data/readers/structure.rst create mode 100644 docs/src/dev-docs/utils/data/readers/systems.rst delete mode 100644 src/metatensor/models/utils/data/readers/structures/__init__.py create mode 100644 src/metatensor/models/utils/data/readers/systems/__init__.py rename src/metatensor/models/utils/data/readers/{structures => systems}/ase.py (70%) diff --git a/docs/src/dev-docs/utils/data/readers/index.rst b/docs/src/dev-docs/utils/data/readers/index.rst index cd7b9a458..fa48e7864 100644 --- a/docs/src/dev-docs/utils/data/readers/index.rst +++ b/docs/src/dev-docs/utils/data/readers/index.rst @@ -1,10 +1,10 @@ -Structure and Target data Readers +system and Target data Readers ================================= -The main entry point for reading structure and target information are the two reader +The main entry point for reading system and target information are the two reader functions -.. autofunction:: metatensor.models.utils.data.read_structures +.. autofunction:: metatensor.models.utils.data.read_systems .. autofunction:: metatensor.models.utils.data.read_targets Target type specific readers @@ -28,5 +28,5 @@ these refer to their documentation .. toctree:: :maxdepth: 1 - structure + systems targets diff --git a/docs/src/dev-docs/utils/data/readers/structure.rst b/docs/src/dev-docs/utils/data/readers/structure.rst deleted file mode 100644 index bc1b1f6aa..000000000 --- a/docs/src/dev-docs/utils/data/readers/structure.rst +++ /dev/null @@ -1,13 +0,0 @@ -Structure Readers -################# - -Parsers for obtaining information from structures. All readers return a :py:class:`list` -of :py:class:`metatensor.torch.atomistic.System`. The mapping which reader is used for -which file type is stored in - -.. autodata:: metatensor.models.utils.data.readers.structures.STRUCTURE_READERS - -Implemented Readers -------------------- - -.. autofunction:: metatensor.models.utils.data.readers.structures.read_structures_ase diff --git a/docs/src/dev-docs/utils/data/readers/systems.rst b/docs/src/dev-docs/utils/data/readers/systems.rst new file mode 100644 index 000000000..ee5134ac0 --- /dev/null +++ b/docs/src/dev-docs/utils/data/readers/systems.rst @@ -0,0 +1,13 @@ +system Readers +################# + +Parsers for obtaining information from systems. All readers return a :py:class:`list` +of :py:class:`metatensor.torch.atomistic.System`. The mapping which reader is used for +which file type is stored in + +.. autodata:: metatensor.models.utils.data.readers.systems.SYSTEM_READERS + +Implemented Readers +------------------- + +.. autofunction:: metatensor.models.utils.data.readers.systems.read_systems_ase diff --git a/docs/src/getting-started/custom_dataset_conf.rst b/docs/src/getting-started/custom_dataset_conf.rst index 6041d542b..3ed6a6d23 100644 --- a/docs/src/getting-started/custom_dataset_conf.rst +++ b/docs/src/getting-started/custom_dataset_conf.rst @@ -12,7 +12,7 @@ parsing data for training. Mandatory sections in the `options.yaml` file include - ``test_set`` - ``validation_set`` -Each section can follow a similar structure, with shorthand methods available to +Each section can follow a similar system, with shorthand methods available to simplify dataset definitions. Minimal Configuration Example @@ -36,7 +36,7 @@ format, which is also valid for initial input: .. code-block:: yaml training_set: - structures: + systems: read_from: dataset.xyz file_format: .xyz length_unit: null @@ -61,13 +61,13 @@ format, which is also valid for initial input: Understanding the YAML Block ---------------------------- -The ``training_set`` is divided into sections ``structures`` and ``targets``: +The ``training_set`` is divided into sections ``systems`` and ``targets``: -Structures Section -^^^^^^^^^^^^^^^^^^ -Describes the structure data like positions and cell information. +Systems Section +^^^^^^^^^^^^^^^ +Describes the system data like positions and cell information. -:param read_from: The file containing structure data. +:param read_from: The file containing system data. :param file_format: The file format, guessed from the suffix if ``null`` or not provided. :param length_unit: The unit of lengths, optional but recommended for simulations. @@ -93,7 +93,7 @@ Target section parameters include: :param quantity: The target's quantity (e.g., ``energy``, ``dipole``). Currently only ``energy`` is supported. -:param read_from: The file for target data, defaults to the ``structures.read_from`` +:param read_from: The file for target data, defaults to the ``systems.read_from`` file if not provided. :param file_format: The file format, guessed from the suffix if not provided. :param key: The key for reading from the file, defaulting to the target section's name @@ -135,7 +135,7 @@ starting with a ``"- "`` (a dash and a space) .. code-block:: yaml training_set: - - structures: + - systems: read_from: dataset_0.xyz length_unit: angstrom targets: @@ -143,7 +143,7 @@ starting with a ``"- "`` (a dash and a space) quantity: energy key: my_energy_label0 unit: eV - - structures: + - systems: read_from: dataset_1.xyz length_unit: angstrom targets: diff --git a/docs/src/getting-started/override.rst b/docs/src/getting-started/override.rst index 5ee395293..e48f92df4 100644 --- a/docs/src/getting-started/override.rst +++ b/docs/src/getting-started/override.rst @@ -35,7 +35,7 @@ hyperparameters. The adjustments for ``num_epochs`` and ``cutoff`` look like thi num_epochs: 200 training_set: - structures: "qm9_reduced_100.xyz" + systems: "qm9_reduced_100.xyz" targets: energy: key: "U0" diff --git a/docs/src/getting-started/usage.rst b/docs/src/getting-started/usage.rst index 3d236c7a2..c3465fe35 100644 --- a/docs/src/getting-started/usage.rst +++ b/docs/src/getting-started/usage.rst @@ -34,7 +34,7 @@ The sub-command to start a model training is metatensor-models train To train a model you have to define your options. This includes the specific -architecture you want to use and the data including the training structures and target +architecture you want to use and the data including the training systems and target values The default model and training hyperparameter for each model are listed in their @@ -67,7 +67,7 @@ The sub-command to evaluate an already trained model is metatensor-models eval Besides the trained `model`, you will also have to provide a file containing the -structure and possible target values for evaluation. The structure of this ``eval.yaml`` +system and possible target values for evaluation. The system of this ``eval.yaml`` is exactly the same as for a dataset in the ``options.yaml`` file. .. literalinclude:: ../../static/qm9/eval.yaml diff --git a/docs/static/qm9/eval.yaml b/docs/static/qm9/eval.yaml index ed9fedde3..49ff29ad0 100644 --- a/docs/static/qm9/eval.yaml +++ b/docs/static/qm9/eval.yaml @@ -1,4 +1,4 @@ -structures: "qm9_reduced_100.xyz" # file where the positions are stored +systems: "qm9_reduced_100.xyz" # file where the positions are stored targets: energy: key: "U0" # name of the target value diff --git a/docs/static/qm9/options.yaml b/docs/static/qm9/options.yaml index 76280f1e8..aa1cb4786 100644 --- a/docs/static/qm9/options.yaml +++ b/docs/static/qm9/options.yaml @@ -2,10 +2,10 @@ architecture: name: experimental.soap_bpnn -# Mandatory section defining the parameters for structure and target data of the +# Mandatory section defining the parameters for system and target data of the # training set training_set: - structures: "qm9_reduced_100.xyz" # file where the positions are stored + systems: "qm9_reduced_100.xyz" # file where the positions are stored targets: energy: key: "U0" # name of the target value diff --git a/examples/alchemical_model/eval.yaml b/examples/alchemical_model/eval.yaml index 48402197f..4b3a00a9d 100644 --- a/examples/alchemical_model/eval.yaml +++ b/examples/alchemical_model/eval.yaml @@ -1,4 +1,4 @@ -structures: "alchemical_reduced_10.xyz" # file where the positions are stored +systems: "alchemical_reduced_10.xyz" # file where the positions are stored targets: energy: key: "energy" # name of the target value diff --git a/examples/alchemical_model/options.yaml b/examples/alchemical_model/options.yaml index 422292d07..0cd9f3d1e 100644 --- a/examples/alchemical_model/options.yaml +++ b/examples/alchemical_model/options.yaml @@ -4,10 +4,10 @@ architecture: training: num_epochs: 10 -# Mandatory section defining the parameters for structure and target data of the +# Mandatory section defining the parameters for system and target data of the # training set training_set: - structures: "alchemical_reduced_10.xyz" # file where the positions are stored + systems: "alchemical_reduced_10.xyz" # file where the positions are stored targets: energy: key: "energy" # name of the target value diff --git a/examples/ase/options.yaml b/examples/ase/options.yaml index 93417caa7..c3819ee59 100644 --- a/examples/ase/options.yaml +++ b/examples/ase/options.yaml @@ -5,9 +5,9 @@ architecture: num_epochs: 100 learning_rate: 0.01 -# Section defining the parameters for structure and target data +# Section defining the parameters for system and target data training_set: - structures: "ethanol_reduced_100.xyz" + systems: "ethanol_reduced_100.xyz" targets: energy: key: "energy" diff --git a/examples/ase/run_ase.py b/examples/ase/run_ase.py index 902b10786..1c7550a38 100644 --- a/examples/ase/run_ase.py +++ b/examples/ase/run_ase.py @@ -4,7 +4,7 @@ This tutorial demonstrates how to use an already trained and exported model to run an ASE simulation of a single ethanol molecule in vacuum. We use a model that was trained -using the :ref:`architecture-soap-bpnn` architecture on 100 ethanol structures +using the :ref:`architecture-soap-bpnn` architecture on 100 ethanol systems containing energies and forces. You can obtain the :download:`dataset file ` used in this example from our website. The dataset is a subset of the `rMD17 dataset @@ -148,8 +148,8 @@ # %% # -# Inspect the structures -# ###################### +# Inspect the systems +# ################### # # Even though the total energy is conserved, we also have to verify that the ethanol # molecule is stable and the bonds did not break. @@ -165,7 +165,7 @@ # As a final analysis we also calculate and plot the carbon-hydrogen radial distribution # function (RDF) from the trajectory and compare this to the RDF from the training set. # -# To use the RDF code from ase we first have to define a unit cell for our structures. +# To use the RDF code from ase we first have to define a unit cell for our systems. # We choose a cubic one with a side length of 10 Å. for atoms in training_frames: diff --git a/examples/basic_usage/usage.sh b/examples/basic_usage/usage.sh index af4dd84a4..26adb30e7 100755 --- a/examples/basic_usage/usage.sh +++ b/examples/basic_usage/usage.sh @@ -13,7 +13,7 @@ metatensor-models train --help metatensor-models eval model.pt eval.yaml # The evaluation command predicts those properties the model was trained against; here -# "U0". The predictions together with the structures have been written in a file named +# "U0". The predictions together with the systems have been written in a file named # ``output.xyz`` in the current directory. The written file starts with the following # lines diff --git a/src/metatensor/models/cli/eval.py b/src/metatensor/models/cli/eval.py index 591d14e85..6d3247540 100644 --- a/src/metatensor/models/cli/eval.py +++ b/src/metatensor/models/cli/eval.py @@ -9,7 +9,7 @@ from omegaconf import DictConfig, OmegaConf from ..utils.compute_loss import compute_model_loss -from ..utils.data import collate_fn, read_structures, read_targets, write_predictions +from ..utils.data import collate_fn, read_systems, read_targets, write_predictions from ..utils.errors import ArchitectureError from ..utils.extract_targets import get_outputs_dict from ..utils.info import finalize_aggregated_info, update_aggregated_info @@ -63,18 +63,18 @@ def _add_eval_model_parser(subparser: argparse._SubParsersAction) -> None: def _eval_targets(model, dataset: Union[_BaseDataset, torch.utils.data.Subset]) -> None: """Evaluate an exported model on a dataset and print the RMSEs for each target.""" - # Attach neighbor lists to the structures: + # Attach neighbor lists to the systems: requested_neighbor_lists = model.requested_neighbors_lists() # working around https://github.com/lab-cosmo/metatensor/issues/521 # Desired: - # for structure, _ in dataset: - # attach_neighbor_lists(structure, requested_neighbors_lists) + # for system, _ in dataset: + # attach_neighbor_lists(system, requested_neighbors_lists) # Current: dataloader = torch.utils.data.DataLoader( dataset, batch_size=1, collate_fn=collate_fn ) - for (structure,), _ in dataloader: - get_system_with_neighbors_lists(structure, requested_neighbor_lists) + for (system,), _ in dataloader: + get_system_with_neighbors_lists(system, requested_neighbor_lists) # Extract all the possible outputs and their gradients from the dataset: outputs_dict = get_outputs_dict([dataset]) @@ -103,8 +103,8 @@ def _eval_targets(model, dataset: Union[_BaseDataset, torch.utils.data.Subset]) # Compute the RMSEs: aggregated_info: Dict[str, Tuple[float, int]] = {} for batch in dataloader: - structures, targets = batch - _, info = compute_model_loss(loss_fn, model, structures, targets) + systems, targets = batch + _, info = compute_model_loss(loss_fn, model, systems, targets) aggregated_info = update_aggregated_info(aggregated_info, info) finalized_info = finalize_aggregated_info(aggregated_info) @@ -182,39 +182,37 @@ def eval_model( file_index_suffix = f"_{i}" logger.info(f"Evaulate dataset{extra_log_message}") - eval_structures = read_structures( - filename=options["structures"]["read_from"], - fileformat=options["structures"]["file_format"], + eval_systems = read_systems( + filename=options["systems"]["read_from"], + fileformat=options["systems"]["file_format"], ) # Predict targets if hasattr(options, "targets"): eval_targets = read_targets(options["targets"]) - eval_dataset = Dataset( - structure=eval_structures, energy=eval_targets["energy"] - ) + eval_dataset = Dataset(system=eval_systems, energy=eval_targets["energy"]) _eval_targets(model, eval_dataset) else: # TODO: batch this # TODO: add forces/stresses/virials if requested - # Attach neighbors list to structures. This step is only required if no + # Attach neighbors list to systems. This step is only required if no # targets are present. Otherwise, the neighbors list have been already # attached in `_eval_targets`. - eval_structures = [ + eval_systems = [ get_system_with_neighbors_lists( - structure, model.requested_neighbors_lists() + system, model.requested_neighbors_lists() ) - for structure in eval_structures + for system in eval_systems ] - # Predict structures + # Predict systems try: # `length_unit` is only required for unit conversions in MD engines and # superflous here. eval_options = ModelEvaluationOptions( length_unit="", outputs=model.capabilities().outputs ) - predictions = model(eval_structures, eval_options, check_consistency=True) + predictions = model(eval_systems, eval_options, check_consistency=True) except Exception as e: raise ArchitectureError(e) @@ -222,5 +220,5 @@ def eval_model( write_predictions( filename=f"{output.stem}{file_index_suffix}{output.suffix}", predictions=predictions, - structures=eval_structures, + systems=eval_systems, ) diff --git a/src/metatensor/models/cli/train.py b/src/metatensor/models/cli/train.py index 57aa175c2..93f7e8772 100644 --- a/src/metatensor/models/cli/train.py +++ b/src/metatensor/models/cli/train.py @@ -20,7 +20,7 @@ from omegaconf.errors import ConfigKeyError from .. import CONFIG_PATH -from ..utils.data import get_all_species, read_structures, read_targets +from ..utils.data import get_all_species, read_systems, read_targets from ..utils.data.dataset import _train_test_random_split from ..utils.errors import ArchitectureError from ..utils.export import export @@ -237,15 +237,15 @@ def _train_model_hydra(options: DictConfig) -> None: train_datasets = [] for train_options in train_options_list: - train_structures = read_structures( - filename=train_options["structures"]["read_from"], - fileformat=train_options["structures"]["file_format"], + train_systems = read_systems( + filename=train_options["systems"]["read_from"], + fileformat=train_options["systems"]["file_format"], dtype=torch.get_default_dtype(), ) train_targets = read_targets( conf=train_options["targets"], dtype=torch.get_default_dtype() ) - train_datasets.append(Dataset(structure=train_structures, **train_targets)) + train_datasets.append(Dataset(system=train_systems, **train_targets)) train_size = 1.0 @@ -288,15 +288,15 @@ def _train_model_hydra(options: DictConfig) -> None: ) for test_options in test_options_list: - test_structures = read_structures( - filename=test_options["structures"]["read_from"], - fileformat=test_options["structures"]["file_format"], + test_systems = read_systems( + filename=test_options["systems"]["read_from"], + fileformat=test_options["systems"]["file_format"], dtype=torch.get_default_dtype(), ) test_targets = read_targets( conf=test_options["targets"], dtype=torch.get_default_dtype() ) - test_dataset = Dataset(structure=test_structures, **test_targets) + test_dataset = Dataset(system=test_systems, **test_targets) test_datasets.append(test_dataset) logger.info("Setting up validation set") @@ -339,16 +339,16 @@ def _train_model_hydra(options: DictConfig) -> None: ) for validation_options in validation_options_list: - validation_structures = read_structures( - filename=validation_options["structures"]["read_from"], - fileformat=validation_options["structures"]["file_format"], + validation_systems = read_systems( + filename=validation_options["systems"]["read_from"], + fileformat=validation_options["systems"]["file_format"], dtype=torch.get_default_dtype(), ) validation_targets = read_targets( conf=validation_options["targets"], dtype=torch.get_default_dtype() ) validation_dataset = Dataset( - structure=validation_structures, **validation_targets + system=validation_systems, **validation_targets ) validation_datasets.append(validation_dataset) @@ -369,7 +369,7 @@ def _train_model_hydra(options: DictConfig) -> None: for train_options in train_options_list for key, value in train_options["targets"].items() } - length_unit = train_options_list[0]["structures"]["length_unit"] + length_unit = train_options_list[0]["systems"]["length_unit"] requested_capabilities = ModelCapabilities( length_unit=length_unit if length_unit is not None else "", species=all_species, diff --git a/src/metatensor/models/experimental/alchemical_model/model.py b/src/metatensor/models/experimental/alchemical_model/model.py index 9a1ba2587..d0eab958e 100644 --- a/src/metatensor/models/experimental/alchemical_model/model.py +++ b/src/metatensor/models/experimental/alchemical_model/model.py @@ -84,9 +84,9 @@ def forward(self, systems: List[System]): cell_shifts=batch_dict["cell_shifts"], centers=batch_dict["centers"], pairs=batch_dict["pairs"], - structure_centers=batch_dict["structure_centers"], - structure_pairs=batch_dict["structure_pairs"], - structure_offsets=batch_dict["structure_offsets"], + structure_centers=batch_dict["system_centers"], + structure_pairs=batch_dict["system_pairs"], + structure_offsets=batch_dict["system_offsets"], ) power_spectrum = self.ps_calculator(spex) return power_spectrum @@ -118,7 +118,7 @@ def __init__( ) if output.per_atom: raise ValueError( - "Alchemical Model only supports per-structure outputs, " + "Alchemical Model only supports per-system outputs, " "but a per-atom output was provided" ) diff --git a/src/metatensor/models/experimental/alchemical_model/tests/test_functionality.py b/src/metatensor/models/experimental/alchemical_model/tests/test_functionality.py index 30138ef58..303d91d7a 100644 --- a/src/metatensor/models/experimental/alchemical_model/tests/test_functionality.py +++ b/src/metatensor/models/experimental/alchemical_model/tests/test_functionality.py @@ -28,8 +28,8 @@ def test_prediction_subset(): ) alchemical_model = Model(capabilities, DEFAULT_HYPERS["model"]) - structure = ase.Atoms("O2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) - system = rascaline.torch.systems_to_torch(structure).to(torch.get_default_dtype()) + system = ase.Atoms("O2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) + system = rascaline.torch.systems_to_torch(system).to(torch.get_default_dtype()) system = get_system_with_neighbors_lists( system, alchemical_model.requested_neighbors_lists() ) diff --git a/src/metatensor/models/experimental/alchemical_model/tests/test_invariance.py b/src/metatensor/models/experimental/alchemical_model/tests/test_invariance.py index 2738ac952..af3665391 100644 --- a/src/metatensor/models/experimental/alchemical_model/tests/test_invariance.py +++ b/src/metatensor/models/experimental/alchemical_model/tests/test_invariance.py @@ -30,16 +30,16 @@ def test_rotational_invariance(): }, ) alchemical_model = Model(capabilities, DEFAULT_HYPERS["model"]) - structure = ase.io.read(DATASET_PATH) - original_structure = copy.deepcopy(structure) - structure.rotate(48, "y") - original_system = rascaline.torch.systems_to_torch(original_structure).to( + system = ase.io.read(DATASET_PATH) + original_system = copy.deepcopy(system) + system.rotate(48, "y") + original_system = rascaline.torch.systems_to_torch(original_system).to( torch.get_default_dtype() ) original_system = get_system_with_neighbors_lists( original_system, alchemical_model.requested_neighbors_lists() ) - system = rascaline.torch.systems_to_torch(structure).to(torch.get_default_dtype()) + system = rascaline.torch.systems_to_torch(system).to(torch.get_default_dtype()) system = get_system_with_neighbors_lists( system, alchemical_model.requested_neighbors_lists() ) diff --git a/src/metatensor/models/experimental/alchemical_model/tests/test_regression.py b/src/metatensor/models/experimental/alchemical_model/tests/test_regression.py index 867f7bb50..efc7dd04d 100644 --- a/src/metatensor/models/experimental/alchemical_model/tests/test_regression.py +++ b/src/metatensor/models/experimental/alchemical_model/tests/test_regression.py @@ -15,7 +15,7 @@ from metatensor.models.experimental.alchemical_model import DEFAULT_HYPERS, Model, train from metatensor.models.utils.data import get_all_species -from metatensor.models.utils.data.readers import read_structures, read_targets +from metatensor.models.utils.data.readers import read_systems, read_targets from metatensor.models.utils.neighbors_lists import get_system_with_neighbors_lists from . import DATASET_PATH @@ -41,11 +41,11 @@ def test_regression_init(): ) alchemical_model = Model(capabilities, DEFAULT_HYPERS["model"]) - # Predict on the first fivestructures - structures = ase.io.read(DATASET_PATH, ":5") + # Predict on the first five systems + systems = ase.io.read(DATASET_PATH, ":5") systems = [ - rascaline.torch.systems_to_torch(structure).to(torch.get_default_dtype()) - for structure in structures + rascaline.torch.systems_to_torch(system).to(torch.get_default_dtype()) + for system in systems ] systems = [ get_system_with_neighbors_lists( @@ -84,7 +84,7 @@ def test_regression_train(): np.random.seed(0) torch.manual_seed(0) - structures = read_structures(DATASET_PATH, dtype=torch.get_default_dtype()) + systems = read_systems(DATASET_PATH, dtype=torch.get_default_dtype()) conf = { "U0": { "quantity": "energy", @@ -97,7 +97,7 @@ def test_regression_train(): } } targets = read_targets(OmegaConf.create(conf), dtype=torch.get_default_dtype()) - dataset = Dataset(structure=structures, U0=targets["U0"]) + dataset = Dataset(system=systems, U0=targets["U0"]) hypers = DEFAULT_HYPERS.copy() hypers["training"]["num_epochs"] = 2 @@ -119,7 +119,7 @@ def test_regression_train(): hypers=hypers, ) - # Predict on the first five structures + # Predict on the first five systems evaluation_options = ModelEvaluationOptions( length_unit=alchemical_model.capabilities.length_unit, outputs=alchemical_model.capabilities.outputs, @@ -129,7 +129,7 @@ def test_regression_train(): alchemical_model.eval(), alchemical_model.capabilities ) output = model( - structures[:5], + systems[:5], evaluation_options, check_consistency=True, ) diff --git a/src/metatensor/models/experimental/alchemical_model/train.py b/src/metatensor/models/experimental/alchemical_model/train.py index 06bfe58bd..903cb438c 100644 --- a/src/metatensor/models/experimental/alchemical_model/train.py +++ b/src/metatensor/models/experimental/alchemical_model/train.py @@ -80,10 +80,10 @@ def train( requested_neighbor_lists = model.requested_neighbors_lists() for dataset in train_datasets + validation_datasets: for i in range(len(dataset)): - structure = dataset[i].structure - # The following line attached the neighbors lists to the structure, - # and doesn't require to reassign the structure to the dataset: - _ = get_system_with_neighbors_lists(structure, requested_neighbor_lists) + system = dataset[i].system + # The following line attached the neighbors lists to the system, + # and doesn't require to reassign the system to the dataset: + _ = get_system_with_neighbors_lists(system, requested_neighbor_lists) # Calculate the average number of atoms and neighbors in the training datasets: average_number_of_atoms = get_average_number_of_atoms(train_datasets) @@ -207,9 +207,9 @@ def train( train_loss = 0.0 for batch in train_dataloader: optimizer.zero_grad() - structures, targets = batch - assert len(structures[0].known_neighbors_lists()) > 0 - loss, info = compute_model_loss(loss_fn, model, structures, targets) + systems, targets = batch + assert len(systems[0].known_neighbors_lists()) > 0 + loss, info = compute_model_loss(loss_fn, model, systems, targets) train_loss += loss.item() loss.backward() optimizer.step() @@ -218,9 +218,9 @@ def train( validation_loss = 0.0 for batch in validation_dataloader: - structures, targets = batch + systems, targets = batch # TODO: specify that the model is not training here to save some autograd - loss, info = compute_model_loss(loss_fn, model, structures, targets) + loss, info = compute_model_loss(loss_fn, model, systems, targets) validation_loss += loss.item() aggregated_validation_info = update_aggregated_info( aggregated_validation_info, info diff --git a/src/metatensor/models/experimental/alchemical_model/utils/systems_to_torch_spex_dict.py b/src/metatensor/models/experimental/alchemical_model/utils/systems_to_torch_spex_dict.py index 63e46e3d6..4844001cf 100644 --- a/src/metatensor/models/experimental/alchemical_model/utils/systems_to_torch_spex_dict.py +++ b/src/metatensor/models/experimental/alchemical_model/utils/systems_to_torch_spex_dict.py @@ -45,9 +45,9 @@ def systems_to_torch_spex_dict( lenghts = torch.tensor([len(item) for item in systems], device=device) nl_lenghts = torch.tensor([len(item.values) for item in nls], device=device) index = torch.arange(len(systems), device=device) - structure_centers = torch.repeat_interleave(index, lenghts) - structure_pairs = torch.repeat_interleave(index, nl_lenghts) - structure_offsets = torch.cat( + system_centers = torch.repeat_interleave(index, lenghts) + system_pairs = torch.repeat_interleave(index, nl_lenghts) + system_offsets = torch.cat( [torch.tensor([0], device=device), torch.cumsum(lenghts[:-1], dim=0)] ) @@ -58,8 +58,8 @@ def systems_to_torch_spex_dict( "centers": centers, "pairs": pairs, "cell_shifts": cell_shifts, - "structure_centers": structure_centers, - "structure_pairs": structure_pairs, - "structure_offsets": structure_offsets, + "system_centers": system_centers, + "system_pairs": system_pairs, + "system_offsets": system_offsets, } return batch_dict diff --git a/src/metatensor/models/experimental/soap_bpnn/model.py b/src/metatensor/models/experimental/soap_bpnn/model.py index 0177e6fdc..9a0d5bb27 100644 --- a/src/metatensor/models/experimental/soap_bpnn/model.py +++ b/src/metatensor/models/experimental/soap_bpnn/model.py @@ -212,7 +212,7 @@ def __init__( ) if output.per_atom: raise ValueError( - "SOAP-BPNN only supports per-structure outputs, " + "SOAP-BPNN only supports per-system outputs, " "but a per-atom output was provided" ) diff --git a/src/metatensor/models/experimental/soap_bpnn/tests/test_continue.py b/src/metatensor/models/experimental/soap_bpnn/tests/test_continue.py index a02af0ff9..416464899 100644 --- a/src/metatensor/models/experimental/soap_bpnn/tests/test_continue.py +++ b/src/metatensor/models/experimental/soap_bpnn/tests/test_continue.py @@ -8,7 +8,7 @@ import metatensor.models from metatensor.models.experimental.soap_bpnn import DEFAULT_HYPERS, Model, train from metatensor.models.utils.data import get_all_species -from metatensor.models.utils.data.readers import read_structures, read_targets +from metatensor.models.utils.data.readers import read_systems, read_targets from metatensor.models.utils.model_io import save_model from . import DATASET_PATH @@ -21,7 +21,7 @@ def test_continue(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) shutil.copy(DATASET_PATH, "qm9_reduced_100.xyz") - structures = read_structures(DATASET_PATH, dtype=torch.get_default_dtype()) + systems = read_systems(DATASET_PATH, dtype=torch.get_default_dtype()) capabilities = ModelCapabilities( length_unit="Angstrom", @@ -35,7 +35,7 @@ def test_continue(monkeypatch, tmp_path): ) model_before = Model(capabilities, DEFAULT_HYPERS["model"]) output_before = model_before( - structures[:5], {"U0": model_before.capabilities.outputs["U0"]} + systems[:5], {"U0": model_before.capabilities.outputs["U0"]} ) save_model(model_before, "model.ckpt") @@ -52,7 +52,7 @@ def test_continue(monkeypatch, tmp_path): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, U0=targets["U0"]) + dataset = Dataset(system=systems, U0=targets["U0"]) hypers = DEFAULT_HYPERS.copy() hypers["training"]["num_epochs"] = 0 @@ -71,9 +71,9 @@ def test_continue(monkeypatch, tmp_path): [dataset], [dataset], capabilities, hypers, continue_from="model.ckpt" ) - # Predict on the first five structures + # Predict on the first five systems output_after = model_after( - structures[:5], {"U0": model_after.capabilities.outputs["U0"]} + systems[:5], {"U0": model_after.capabilities.outputs["U0"]} ) assert metatensor.torch.allclose(output_before["U0"], output_after["U0"]) diff --git a/src/metatensor/models/experimental/soap_bpnn/tests/test_functionality.py b/src/metatensor/models/experimental/soap_bpnn/tests/test_functionality.py index 402cf227d..baf9444ba 100644 --- a/src/metatensor/models/experimental/soap_bpnn/tests/test_functionality.py +++ b/src/metatensor/models/experimental/soap_bpnn/tests/test_functionality.py @@ -24,16 +24,16 @@ def test_prediction_subset_elements(): soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]) - structure = ase.Atoms("O2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) + system = ase.Atoms("O2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0]]) soap_bpnn( - [rascaline.torch.systems_to_torch(structure).to(torch.get_default_dtype())], + [rascaline.torch.systems_to_torch(system).to(torch.get_default_dtype())], {"energy": soap_bpnn.capabilities.outputs["energy"]}, ) def test_prediction_subset_atoms(): """Tests that the model can predict on a subset - of the atoms in a structure.""" + of the atoms in a system.""" capabilities = ModelCapabilities( length_unit="Angstrom", @@ -49,22 +49,22 @@ def test_prediction_subset_atoms(): soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]) # Since we don't yet support atomic predictions, we will test this by - # predicting on a structure with two monomers at a large distance + # predicting on a system with two monomers at a large distance - structure_monomer = ase.Atoms( + system_monomer = ase.Atoms( "NO2", positions=[[0.0, 0.0, 0.0], [0.0, 0.0, 1.0], [0.0, 0.0, 2.0]] ) energy_monomer = soap_bpnn( [ - rascaline.torch.systems_to_torch(structure_monomer).to( + rascaline.torch.systems_to_torch(system_monomer).to( torch.get_default_dtype() ) ], {"energy": soap_bpnn.capabilities.outputs["energy"]}, ) - structure_far_away_dimer = ase.Atoms( + system_far_away_dimer = ase.Atoms( "N2O4", positions=[ [0.0, 0.0, 0.0], @@ -83,7 +83,7 @@ def test_prediction_subset_atoms(): energy_dimer = soap_bpnn( [ - rascaline.torch.systems_to_torch(structure_far_away_dimer).to( + rascaline.torch.systems_to_torch(system_far_away_dimer).to( torch.get_default_dtype() ) ], @@ -92,7 +92,7 @@ def test_prediction_subset_atoms(): energy_monomer_in_dimer = soap_bpnn( [ - rascaline.torch.systems_to_torch(structure_far_away_dimer).to( + rascaline.torch.systems_to_torch(system_far_away_dimer).to( torch.get_default_dtype() ) ], diff --git a/src/metatensor/models/experimental/soap_bpnn/tests/test_invariance.py b/src/metatensor/models/experimental/soap_bpnn/tests/test_invariance.py index 0d20f209f..929e07952 100644 --- a/src/metatensor/models/experimental/soap_bpnn/tests/test_invariance.py +++ b/src/metatensor/models/experimental/soap_bpnn/tests/test_invariance.py @@ -25,16 +25,16 @@ def test_rotational_invariance(): ) soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]).to(torch.float64) - structure = ase.io.read(DATASET_PATH) - original_structure = copy.deepcopy(structure) - structure.rotate(48, "y") + system = ase.io.read(DATASET_PATH) + original_system = copy.deepcopy(system) + system.rotate(48, "y") original_output = soap_bpnn( - [rascaline.torch.systems_to_torch(original_structure)], + [rascaline.torch.systems_to_torch(original_system)], {"energy": soap_bpnn.capabilities.outputs["energy"]}, ) rotated_output = soap_bpnn( - [rascaline.torch.systems_to_torch(structure)], + [rascaline.torch.systems_to_torch(system)], {"energy": soap_bpnn.capabilities.outputs["energy"]}, ) diff --git a/src/metatensor/models/experimental/soap_bpnn/tests/test_regression.py b/src/metatensor/models/experimental/soap_bpnn/tests/test_regression.py index 69c0d1083..310d7c840 100644 --- a/src/metatensor/models/experimental/soap_bpnn/tests/test_regression.py +++ b/src/metatensor/models/experimental/soap_bpnn/tests/test_regression.py @@ -10,7 +10,7 @@ from metatensor.models.experimental.soap_bpnn import DEFAULT_HYPERS, Model, train from metatensor.models.utils.data import get_all_species -from metatensor.models.utils.data.readers import read_structures, read_targets +from metatensor.models.utils.data.readers import read_systems, read_targets from . import DATASET_PATH @@ -36,13 +36,13 @@ def test_regression_init(): ) soap_bpnn = Model(capabilities, DEFAULT_HYPERS["model"]) - # Predict on the first fivestructures - structures = ase.io.read(DATASET_PATH, ":5") + # Predict on the first five systems + systems = ase.io.read(DATASET_PATH, ":5") output = soap_bpnn( [ - rascaline.torch.systems_to_torch(structure).to(torch.get_default_dtype()) - for structure in structures + rascaline.torch.systems_to_torch(system).to(torch.get_default_dtype()) + for system in systems ], {"U0": soap_bpnn.capabilities.outputs["U0"]}, ) @@ -57,7 +57,7 @@ def test_regression_train(): """Perform a regression test on the model when trained for 2 epoch on a small dataset""" - structures = read_structures(DATASET_PATH, dtype=torch.get_default_dtype()) + systems = read_systems(DATASET_PATH, dtype=torch.get_default_dtype()) conf = { "U0": { @@ -71,7 +71,7 @@ def test_regression_train(): } } targets = read_targets(OmegaConf.create(conf), dtype=torch.get_default_dtype()) - dataset = Dataset(structure=structures, U0=targets["U0"]) + dataset = Dataset(system=systems, U0=targets["U0"]) hypers = DEFAULT_HYPERS.copy() hypers["training"]["num_epochs"] = 2 @@ -88,8 +88,8 @@ def test_regression_train(): ) soap_bpnn = train([dataset], [dataset], capabilities, hypers) - # Predict on the first five structures - output = soap_bpnn(structures[:5], {"U0": soap_bpnn.capabilities.outputs["U0"]}) + # Predict on the first five systems + output = soap_bpnn(systems[:5], {"U0": soap_bpnn.capabilities.outputs["U0"]}) expected_output = torch.tensor( [[-40.5102], [-56.6547], [-76.4395], [-77.3478], [-93.3939]] diff --git a/src/metatensor/models/experimental/soap_bpnn/train.py b/src/metatensor/models/experimental/soap_bpnn/train.py index 80bcf2b4b..e40ae3cce 100644 --- a/src/metatensor/models/experimental/soap_bpnn/train.py +++ b/src/metatensor/models/experimental/soap_bpnn/train.py @@ -182,8 +182,8 @@ def train( train_loss = 0.0 for batch in train_dataloader: optimizer.zero_grad() - structures, targets = batch - loss, info = compute_model_loss(loss_fn, model, structures, targets) + systems, targets = batch + loss, info = compute_model_loss(loss_fn, model, systems, targets) train_loss += loss.item() loss.backward() optimizer.step() @@ -192,9 +192,9 @@ def train( validation_loss = 0.0 for batch in validation_dataloader: - structures, targets = batch + systems, targets = batch # TODO: specify that the model is not training here to save some autograd - loss, info = compute_model_loss(loss_fn, model, structures, targets) + loss, info = compute_model_loss(loss_fn, model, systems, targets) validation_loss += loss.item() aggregated_validation_info = update_aggregated_info( aggregated_validation_info, info diff --git a/src/metatensor/models/utils/composition.py b/src/metatensor/models/utils/composition.py index 47680ca5b..6979e7686 100644 --- a/src/metatensor/models/utils/composition.py +++ b/src/metatensor/models/utils/composition.py @@ -13,14 +13,14 @@ def calculate_composition_weights( ) -> Tuple[torch.Tensor, List[int]]: """Calculate the composition weights for a dataset. - For now, it assumes per-structure properties. + For now, it assumes per-system properties. :param dataset: Dataset to calculate the composition weights for. :returns: Composition weights for the dataset, as well as the list of species that the weights correspond to. """ - # Get the target for each structure in the dataset + # Get the target for each system in the dataset # TODO: the dataset will be iterable once metatensor PR #500 merged. targets = torch.stack( [ @@ -30,12 +30,12 @@ def calculate_composition_weights( ] ) - # Get the composition for each structure in the dataset + # Get the composition for each system in the dataset composition_calculator = rascaline.torch.AtomicComposition(per_structure=True) # TODO: the dataset will be iterable once metatensor PR #500 merged. composition_features = composition_calculator.compute( [ - dataset[sample_id]._asdict()["structure"] + dataset[sample_id]._asdict()["system"] for dataset in datasets for sample_id in range(len(dataset)) ] diff --git a/src/metatensor/models/utils/compute_loss.py b/src/metatensor/models/utils/compute_loss.py index d6b255d50..c4c7959f5 100644 --- a/src/metatensor/models/utils/compute_loss.py +++ b/src/metatensor/models/utils/compute_loss.py @@ -192,12 +192,12 @@ def _position_gradients_to_block(gradients_list): [ torch.concatenate( [ - torch.tensor([i] * len(structure)) - for i, structure in enumerate(gradients_list) + torch.tensor([i] * len(system)) + for i, system in enumerate(gradients_list) ] ), torch.concatenate( - [torch.arange(len(structure)) for structure in gradients_list] + [torch.arange(len(system)) for system in gradients_list] ), ], dim=1, diff --git a/src/metatensor/models/utils/data/__init__.py b/src/metatensor/models/utils/data/__init__.py index b3001833b..3ec5c324e 100644 --- a/src/metatensor/models/utils/data/__init__.py +++ b/src/metatensor/models/utils/data/__init__.py @@ -8,7 +8,7 @@ read_energy, read_forces, read_stress, - read_structures, + read_systems, read_targets, read_virial, ) diff --git a/src/metatensor/models/utils/data/dataset.py b/src/metatensor/models/utils/data/dataset.py index 4c1231bd2..14fdb06b0 100644 --- a/src/metatensor/models/utils/data/dataset.py +++ b/src/metatensor/models/utils/data/dataset.py @@ -32,9 +32,8 @@ def get_all_species(datasets: Union[_BaseDataset, List[_BaseDataset]]) -> List[i """ Returns the list of all species present in a dataset or list of datasets. - :param datasets: The dataset, or list of datasets. - - :return: The sorted list of species present in the datasets. + :param datasets: the dataset, or list of datasets. + :returns: The sorted list of species present in the datasets. """ if not isinstance(datasets, list): @@ -44,8 +43,8 @@ def get_all_species(datasets: Union[_BaseDataset, List[_BaseDataset]]) -> List[i species = [] for dataset in datasets: for index in range(len(dataset)): - structure = dataset[index][0] # extract the structure from the NamedTuple - species += structure.species.tolist() + system = dataset[index][0] # extract the system from the NamedTuple + species += system.species.tolist() # Remove duplicates and sort: result = list(set(species)) @@ -58,11 +57,8 @@ def get_all_targets(dataset: _BaseDataset) -> List[str]: """ Returns the list of all targets present in the dataset. - Args: - dataset: The dataset. - - Returns: - The list of targets present in the dataset. + :param dataset: the dataset + :returns: list of targets present in the dataset. """ # The following does not work because the `dataset` can also @@ -73,7 +69,7 @@ def get_all_targets(dataset: _BaseDataset) -> List[str]: target_names = [] for index in range(len(dataset)): sample = dataset[index]._asdict() # NamedTuple -> dict - sample.pop("structure") # structure not needed + sample.pop("system") # system not needed target_names += list(sample.keys()) # Remove duplicates: @@ -83,13 +79,13 @@ def get_all_targets(dataset: _BaseDataset) -> List[str]: def collate_fn(batch: List[NamedTuple]) -> Tuple[List, Dict[str, TensorMap]]: """ Wraps the `metatensor-learn` default collate function `group_and_join` to - return the data fields as a list of structures, and a dictionary of nameed + return the data fields as a list of systems, and a dictionary of nameed targets. """ collated_targets = group_and_join(batch)._asdict() - structures = collated_targets.pop("structure") - return structures, collated_targets + systems = collated_targets.pop("system") + return systems, collated_targets def check_datasets( diff --git a/src/metatensor/models/utils/data/readers/__init__.py b/src/metatensor/models/utils/data/readers/__init__.py index af424b55b..39ca25823 100644 --- a/src/metatensor/models/utils/data/readers/__init__.py +++ b/src/metatensor/models/utils/data/readers/__init__.py @@ -2,7 +2,7 @@ read_energy, read_forces, read_stress, - read_structures, + read_systems, read_targets, read_virial, ) diff --git a/src/metatensor/models/utils/data/readers/readers.py b/src/metatensor/models/utils/data/readers/readers.py index e28499c7c..306f6c6e7 100644 --- a/src/metatensor/models/utils/data/readers/readers.py +++ b/src/metatensor/models/utils/data/readers/readers.py @@ -7,7 +7,7 @@ from omegaconf import DictConfig from rascaline.torch.system import System -from .structures import STRUCTURE_READERS +from .systems import SYSTEM_READERS from .targets import ENERGY_READERS, FORCES_READERS, STRESS_READERS, VIRIAL_READERS @@ -42,7 +42,7 @@ def read_energy( :param filename: name of the file to read :param target_value: target value key name to be parsed from the file. - :param fileformat: format of the structure file. If :py:obj:`None` the format is + :param fileformat: format of the system file. If :py:obj:`None` the format is determined from the suffix :param dtype: desired data type of returned tensor :returns: target value stored stored as a :class:`metatensor.TensorBlock` @@ -66,7 +66,7 @@ def read_forces( :param filename: name of the file to read :param target_value: target value key name to be parsed from the file - :param fileformat: format of the structure file. If :py:obj:`None` the format is + :param fileformat: format of the system file. If :py:obj:`None` the format is determined from the suffix :param dtype: desired data type of returned tensor :returns: target value stored stored as a :class:`metatensor.TensorBlock` @@ -90,7 +90,7 @@ def read_stress( :param filename: name of the file to read :param target_value: target value key name to be parsed from the file. - :param fileformat: format of the structure file. If :py:obj:`None` the format is + :param fileformat: format of the system file. If :py:obj:`None` the format is determined from the suffix :param dtype: desired data type of returned tensor :returns: target value stored stored as a :class:`metatensor.TensorBlock` @@ -104,21 +104,21 @@ def read_stress( ) -def read_structures( +def read_systems( filename: str, fileformat: Optional[str] = None, dtype: torch.dtype = torch.float64, ) -> List[System]: - """Read structure informations from a file. + """Read system informations from a file. :param filename: name of the file to read - :param fileformat: format of the structure file. If :py:obj:`None` the format is + :param fileformat: format of the system file. If :py:obj:`None` the format is determined from the suffix. :param dtype: desired data type of returned tensor - :returns: list of structures + :returns: list of systems """ return _base_reader( - readers=STRUCTURE_READERS, + readers=SYSTEM_READERS, filename=filename, fileformat=fileformat, dtype=dtype, @@ -135,7 +135,7 @@ def read_virial( :param filename: name of the file to read :param target_value: target value key name to be parsed from the file. - :param fileformat: format of the structure file. If :py:obj:`None` the format is + :param fileformat: format of the system file. If :py:obj:`None` the format is determined from the suffix. :param dtype: desired data type of returned tensor :returns: target value stored stored as a :class:`metatensor.TensorBlock` diff --git a/src/metatensor/models/utils/data/readers/structures/__init__.py b/src/metatensor/models/utils/data/readers/structures/__init__.py deleted file mode 100644 index 78457ca29..000000000 --- a/src/metatensor/models/utils/data/readers/structures/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .ase import read_structures_ase - -STRUCTURE_READERS = {".extxyz": read_structures_ase, ".xyz": read_structures_ase} -""":py:class:`dict`: dictionary mapping file suffixes to a structure reader""" diff --git a/src/metatensor/models/utils/data/readers/systems/__init__.py b/src/metatensor/models/utils/data/readers/systems/__init__.py new file mode 100644 index 000000000..9e2332b35 --- /dev/null +++ b/src/metatensor/models/utils/data/readers/systems/__init__.py @@ -0,0 +1,4 @@ +from .ase import read_systems_ase + +SYSTEM_READERS = {".extxyz": read_systems_ase, ".xyz": read_systems_ase} +""":py:class:`dict`: dictionary mapping file suffixes to a system reader""" diff --git a/src/metatensor/models/utils/data/readers/structures/ase.py b/src/metatensor/models/utils/data/readers/systems/ase.py similarity index 70% rename from src/metatensor/models/utils/data/readers/structures/ase.py rename to src/metatensor/models/utils/data/readers/systems/ase.py index 8fcf8ea66..de04a5d6e 100644 --- a/src/metatensor/models/utils/data/readers/structures/ase.py +++ b/src/metatensor/models/utils/data/readers/systems/ase.py @@ -6,16 +6,14 @@ from rascaline.torch.system import System, systems_to_torch -def read_structures_ase( - filename: str, dtype: torch.dtype = torch.float64 -) -> List[System]: - """Store structure informations using ase. +def read_systems_ase(filename: str, dtype: torch.dtype = torch.float64) -> List[System]: + """Store system informations using ase. :param filename: name of the file to read :param dtype: desired data type of returned tensor :returns: - A list of structures + A list of systems """ systems = [AseSystem(atoms) for atoms in ase.io.read(filename, ":")] diff --git a/src/metatensor/models/utils/data/readers/targets/ase.py b/src/metatensor/models/utils/data/readers/targets/ase.py index 0cb689579..85c6cc7ec 100644 --- a/src/metatensor/models/utils/data/readers/targets/ase.py +++ b/src/metatensor/models/utils/data/readers/targets/ase.py @@ -25,9 +25,9 @@ def read_energy_ase( properties = Labels.single() blocks = [] - for i_structure, atoms in enumerate(frames): + for i_system, atoms in enumerate(frames): values = torch.tensor([[atoms.info[key]]], dtype=dtype) - samples = Labels(["structure"], torch.tensor([[i_structure]])) + samples = Labels(["system"], torch.tensor([[i_system]])) block = TensorBlock( values=values, @@ -61,14 +61,14 @@ def read_forces_ase( properties = Labels.single() blocks = [] - for i_structure, atoms in enumerate(frames): + for i_system, atoms in enumerate(frames): # We store forces as positions gradients which means we invert the sign values = -torch.tensor(atoms.arrays[key], dtype=dtype) values = values.reshape(-1, 3, 1) samples = Labels( - ["sample", "structure", "atom"], - torch.tensor([[0, i_structure, a] for a in range(len(values))]), + ["sample", "system", "atom"], + torch.tensor([[0, i_system, a] for a in range(len(values))]), ) block = TensorBlock( @@ -150,20 +150,20 @@ def _read_virial_stress_ase( properties = Labels.single() blocks = [] - for i_structure, atoms in enumerate(frames): + for i_system, atoms in enumerate(frames): values = torch.tensor(atoms.info[key].tolist(), dtype=dtype) if values.shape == (9,): warnings.warn( - "Found 9-long numerical vector for the stress/virial in structure " - f"{i_structure}. Assume a row major format for the conversion into a " + "Found 9-long numerical vector for the stress/virial in system " + f"{i_system}. Assume a row major format for the conversion into a " "3 x 3 matrix.", stacklevel=2, ) elif values.shape != (3, 3): raise ValueError( - f"Values in structure {i_structure} has shape {values.shape}. " + f"Values in system {i_system} has shape {values.shape}. " "Stress/virial must be a 3 x 3 matrix or a 9-long numerical vector." ) @@ -174,7 +174,7 @@ def _read_virial_stress_ase( else: # is stress if atoms.cell.volume == 0: raise ValueError( - f"Structure {i_structure} has zero cell vectors. Stress can only " + f"system {i_system} has zero cell vectors. Stress can only " "be used if cell is non zero." ) values *= atoms.cell.volume diff --git a/src/metatensor/models/utils/data/writers/__init__.py b/src/metatensor/models/utils/data/writers/__init__.py index 8648690ea..492d9fb5d 100644 --- a/src/metatensor/models/utils/data/writers/__init__.py +++ b/src/metatensor/models/utils/data/writers/__init__.py @@ -14,17 +14,17 @@ def write_predictions( filename: str, predictions: TensorMap, - structures: List[System], + systems: List[System], fileformat: Optional[str] = None, ) -> None: """Writes predictions to a file. - For certain file suffixes also the structures will be written (i.e ``xyz``). + For certain file suffixes also the systems will be written (i.e ``xyz``). :param filename: name of the file to write :param predictions: :py:class:`metatensor.torch.TensorMap` containing the predictions that should be written - :param structures: list of structures that for some writers will also be written + :param systems: list of systems that for some writers will also be written :param fileformat: format of the target value file. If :py:obj:`None` the format is determined from the suffix. """ @@ -36,4 +36,4 @@ def write_predictions( except KeyError: raise ValueError(f"fileformat '{fileformat}' is not supported") - return writer(filename, predictions, structures) + return writer(filename, predictions, systems) diff --git a/src/metatensor/models/utils/data/writers/xyz.py b/src/metatensor/models/utils/data/writers/xyz.py index 0c65e7b06..466355008 100644 --- a/src/metatensor/models/utils/data/writers/xyz.py +++ b/src/metatensor/models/utils/data/writers/xyz.py @@ -7,18 +7,18 @@ from rascaline.torch.system import System -def write_xyz(filename: str, predictions: TensorMap, structures: List[System]) -> None: +def write_xyz(filename: str, predictions: TensorMap, systems: List[System]) -> None: """An ase based xyz file writer :param filename: name of the file to read :param predictions: prediction values written to the file. - :param structures: strcutures additional written to the file. + :param systems: strcutures additional written to the file. """ # Get the target property name: target_name = next(iter(predictions.keys())) frames = [] - for i_system, system in enumerate(structures): + for i_system, system in enumerate(systems): info = { target_name: float(predictions[target_name].block().values[i_system, 0]) } diff --git a/src/metatensor/models/utils/extract_targets.py b/src/metatensor/models/utils/extract_targets.py index d423e41c1..84abc5fc4 100644 --- a/src/metatensor/models/utils/extract_targets.py +++ b/src/metatensor/models/utils/extract_targets.py @@ -18,7 +18,7 @@ def get_outputs_dict(datasets: List[Union[Dataset, torch.utils.data.Subset]]): outputs_dict = {} for dataset in datasets: targets = next(iter(dataset))._asdict() - targets.pop("structure") # structure not needed + targets.pop("system") # system not needed # targets is now a dictionary of TensorMaps for target_name, target_tmap in targets.items(): diff --git a/src/metatensor/models/utils/normalize.py b/src/metatensor/models/utils/normalize.py index 4eb11cbf0..2b223f28d 100644 --- a/src/metatensor/models/utils/normalize.py +++ b/src/metatensor/models/utils/normalize.py @@ -18,8 +18,8 @@ def get_average_number_of_atoms( for dataset in datasets: num_atoms = [] for i in range(len(dataset)): - structure = dataset[i].structure - num_atoms.append(len(structure)) + system = dataset[i].system + num_atoms.append(len(system)) average_number_of_atoms.append( torch.mean(torch.tensor(num_atoms).to(torch.get_default_dtype())) ) @@ -39,17 +39,15 @@ def get_average_number_of_neighbors( for dataset in datasets: num_neighbors = [] for i in range(len(dataset)): - structure = dataset[i].structure - known_neighbors_lists = structure.known_neighbors_lists() + system = dataset[i].system + known_neighbors_lists = system.known_neighbors_lists() if len(known_neighbors_lists) == 0: - raise ValueError( - f"Structure {structure} does not have a neighbors list" - ) + raise ValueError(f"system {system} does not have a neighbors list") elif len(known_neighbors_lists) > 1: raise ValueError( - "More than one neighbors list per structure is not yet supported" + "More than one neighbors list per system is not yet supported" ) - nl = structure.get_neighbors_list(known_neighbors_lists[0]) + nl = system.get_neighbors_list(known_neighbors_lists[0]) num_neighbors.append( torch.mean( torch.unique(nl.samples["first_atom"], return_counts=True)[1].to( diff --git a/src/metatensor/models/utils/omegaconf.py b/src/metatensor/models/utils/omegaconf.py index ba39a0e53..40af918f0 100644 --- a/src/metatensor/models/utils/omegaconf.py +++ b/src/metatensor/models/utils/omegaconf.py @@ -24,7 +24,7 @@ def _resolve_single_str(config): # BASE CONFIGURATIONS -CONF_STRUCTURES = OmegaConf.create( +CONF_SYSTEMS = OmegaConf.create( { "read_from": "${..read_from}", "file_format": "${file_format:}", @@ -36,7 +36,7 @@ def _resolve_single_str(config): CONF_TARGET_FIELDS = OmegaConf.create( { "quantity": "energy", - "read_from": "${...structures.read_from}", + "read_from": "${...systems.read_from}", "file_format": "${file_format:}", "key": None, "unit": None, @@ -66,7 +66,7 @@ def expand_dataset_config(conf: Union[str, DictConfig, ListConfig]) -> ListConfi This function takes a dataset configuration, either as a string, DictConfig or a ListConfig, and expands it into a detailed configuration format. It processes - structures, targets, and gradient sections, setting default values and inferring + systems, targets, and gradient sections, setting default values and inferring missing information. Unknown keys are ignored, allowing for flexibility. If the dataset configuration is either a :class:`str` or a @@ -74,11 +74,11 @@ def expand_dataset_config(conf: Union[str, DictConfig, ListConfig]) -> ListConfi The function performs the following steps for each c - - Loads base configurations for structures, targets, and gradients from predefined + - Loads base configurations for systems, targets, and gradients from predefined YAML files. - Merges and interpolates the input configuration with the base configurations. - Expands shorthand notations like file paths or simple true/false settings to full - dictionary structures. + dictionary systems. - Handles special cases, such as the mandatory nature of the 'energy' section for MD simulations and the mutual exclusivity of 'stress' and 'virial' sections. - Validates the final expanded configuration, particularly for gradient-related @@ -96,7 +96,7 @@ def expand_dataset_config(conf: Union[str, DictConfig, ListConfig]) -> ListConfi if isinstance(conf, str): read_from = conf conf = OmegaConf.create( - {"structures": read_from, "targets": {"energy": read_from}} + {"systems": read_from, "targets": {"energy": read_from}} ) # Expand DictConfig -> ListConfig @@ -105,14 +105,12 @@ def expand_dataset_config(conf: Union[str, DictConfig, ListConfig]) -> ListConfi # Perform expansion per config inside the ListConfig for conf_element in conf: - if hasattr(conf_element, "structures"): - if type(conf_element["structures"]) is str: - conf_element["structures"] = _resolve_single_str( - conf_element["structures"] - ) + if hasattr(conf_element, "systems"): + if type(conf_element["systems"]) is str: + conf_element["systems"] = _resolve_single_str(conf_element["systems"]) - conf_element["structures"] = OmegaConf.merge( - CONF_STRUCTURES, conf_element["structures"] + conf_element["systems"] = OmegaConf.merge( + CONF_SYSTEMS, conf_element["systems"] ) if hasattr(conf_element, "targets"): @@ -193,7 +191,7 @@ def check_units( :param desired_options: The dataset options ``actual_options`` is tested against. :raises ValueError: If the length units are not consistent between - the structure in the dataset options. + the system in the dataset options. :raises ValueError: If a target is present only in desider_option and not in actual_option. :raises ValueError: If the unit of a target quantity is not consistent between @@ -215,14 +213,14 @@ def check_units( desired_options, ): if ( - actual_options_element["structures"]["length_unit"] - != desired_options_element["structures"]["length_unit"] + actual_options_element["systems"]["length_unit"] + != desired_options_element["systems"]["length_unit"] ): raise ValueError( "`length_unit`s are inconsistent between one of the dataset options." - f" {actual_options_element['structures']['length_unit']!r} " + f" {actual_options_element['systems']['length_unit']!r} " "!= " - f"{desired_options_element['structures']['length_unit']!r}." + f"{desired_options_element['systems']['length_unit']!r}." ) for target in actual_options_element["targets"]: @@ -250,7 +248,7 @@ def check_options_list(dataset_config: ListConfig) -> None: This is useful if the dataset config is made of several datasets. - The function checks (1) if ``length_units`` in each structure section is the same. + The function checks (1) if ``length_units`` in each system section is the same. If the names of the ``"targets"`` sections are the same between the elements of the list of datasets also (2) the units must be the same. @@ -267,14 +265,14 @@ def check_options_list(dataset_config: ListConfig) -> None: for actual_config in dataset_config[1:]: if ( - actual_config["structures"]["length_unit"] - != desired_config["structures"]["length_unit"] + actual_config["systems"]["length_unit"] + != desired_config["systems"]["length_unit"] ): raise ValueError( "`length_unit`s are inconsistent between one of the dataset options." - f" {actual_config['structures']['length_unit']!r} " + f" {actual_config['systems']['length_unit']!r} " "!= " - f"{desired_config['structures']['length_unit']!r}." + f"{desired_config['systems']['length_unit']!r}." ) for target_key, target in actual_config["targets"].items(): diff --git a/tests/cli/test_train_model.py b/tests/cli/test_train_model.py index 1bd0a61f8..fd66c5552 100644 --- a/tests/cli/test_train_model.py +++ b/tests/cli/test_train_model.py @@ -78,24 +78,24 @@ def test_train_explicit_validation_test( also when the validation and test sets are provided explicitly.""" monkeypatch.chdir(tmp_path) - structures = ase.io.read(DATASET_PATH, ":") + systems = ase.io.read(DATASET_PATH, ":") - ase.io.write("qm9_reduced_100.xyz", structures[:50]) + ase.io.write("qm9_reduced_100.xyz", systems[:50]) options["training_set"] = OmegaConf.create(n_datasets * [options["training_set"]]) if validation_set_file: - ase.io.write("validation.xyz", structures[50:80]) + ase.io.write("validation.xyz", systems[50:80]) options["validation_set"] = options["training_set"][0].copy() - options["validation_set"]["structures"]["read_from"] = "validation.xyz" + options["validation_set"]["systems"]["read_from"] = "validation.xyz" options["validation_set"] = OmegaConf.create( n_datasets * [options["validation_set"]] ) if test_set_file: - ase.io.write("test.xyz", structures[80:]) + ase.io.write("test.xyz", systems[80:]) options["test_set"] = options["training_set"][0].copy() - options["test_set"]["structures"]["read_from"] = "test.xyz" + options["test_set"]["systems"]["read_from"] = "test.xyz" options["test_set"] = OmegaConf.create(n_datasets * [options["test_set"]]) train_model(options) @@ -119,14 +119,14 @@ def test_train_multiple_datasets(monkeypatch, tmp_path, options): also when learning on two different datasets.""" monkeypatch.chdir(tmp_path) - structures = ase.io.read(DATASET_PATH, ":") - structures_2 = ase.io.read(DATASET_PATH_2, ":") + systems = ase.io.read(DATASET_PATH, ":") + systems_2 = ase.io.read(DATASET_PATH_2, ":") - ase.io.write("qm9_reduced_100.xyz", structures[:50]) - ase.io.write("ethanol_reduced_100.xyz", structures_2[:50]) + ase.io.write("qm9_reduced_100.xyz", systems[:50]) + ase.io.write("ethanol_reduced_100.xyz", systems_2[:50]) options["training_set"] = OmegaConf.create(2 * [options["training_set"]]) - options["training_set"][1]["structures"]["read_from"] = "ethanol_reduced_100.xyz" + options["training_set"][1]["systems"]["read_from"] = "ethanol_reduced_100.xyz" options["training_set"][1]["targets"]["energy"]["key"] = "energy" options["training_set"][0]["targets"].pop("energy") options["training_set"][0]["targets"]["U0"] = OmegaConf.create({"key": "U0"}) @@ -148,21 +148,21 @@ def test_unit_check_is_performed( """Test that error is raised if units are inconsistent between the datasets.""" monkeypatch.chdir(tmp_path) - structures = ase.io.read(DATASET_PATH, ":") + systems = ase.io.read(DATASET_PATH, ":") - ase.io.write("qm9_reduced_100.xyz", structures[:50]) + ase.io.write("qm9_reduced_100.xyz", systems[:50]) if validation_set_file: - ase.io.write("test.xyz", structures[50:80]) + ase.io.write("test.xyz", systems[50:80]) options["validation_set"] = options["training_set"].copy() - options["validation_set"]["structures"]["read_from"] = "test.xyz" - options["validation_set"]["structures"]["length_unit"] = "foo" + options["validation_set"]["systems"]["read_from"] = "test.xyz" + options["validation_set"]["systems"]["length_unit"] = "foo" if test_set_file: - ase.io.write("validation.xyz", structures[80:]) + ase.io.write("validation.xyz", systems[80:]) options["test_set"] = options["training_set"].copy() - options["test_set"]["structures"]["read_from"] = "validation.xyz" - options["test_set"]["structures"]["length_unit"] = "foo" + options["test_set"]["systems"]["read_from"] = "validation.xyz" + options["test_set"]["systems"]["length_unit"] = "foo" with pytest.raises(SystemExit): train_model(options) @@ -182,20 +182,20 @@ def test_inconsistent_number_of_datasets( i.e one train dataset but two validation datasets. Same for the test dataset.""" monkeypatch.chdir(tmp_path) - structures = ase.io.read(DATASET_PATH, ":") + systems = ase.io.read(DATASET_PATH, ":") - ase.io.write("qm9_reduced_100.xyz", structures[:50]) + ase.io.write("qm9_reduced_100.xyz", systems[:50]) if validation_set_file: - ase.io.write("validation.xyz", structures[50:80]) + ase.io.write("validation.xyz", systems[50:80]) options["validation_set"] = options["training_set"].copy() - options["validation_set"]["structures"]["read_from"] = "validation.xyz" + options["validation_set"]["systems"]["read_from"] = "validation.xyz" options["validation_set"] = OmegaConf.create(2 * [options["validation_set"]]) if test_set_file: - ase.io.write("test.xyz", structures[80:]) + ase.io.write("test.xyz", systems[80:]) options["test_set"] = options["training_set"].copy() - options["test_set"]["structures"]["read_from"] = "test.xyz" + options["test_set"]["systems"]["read_from"] = "test.xyz" options["test_set"] = OmegaConf.create(2 * [options["test_set"]]) with pytest.raises(SystemExit): @@ -224,8 +224,8 @@ def test_inconsistencies_within_list_datasets( ref_dataset_conf = OmegaConf.create(2 * [options["training_set"]]) broken_dataset_conf = ref_dataset_conf.copy() - broken_dataset_conf[0]["structures"]["length_unit"] = "foo" - broken_dataset_conf[1]["structures"]["length_unit"] = "bar" + broken_dataset_conf[0]["systems"]["length_unit"] = "foo" + broken_dataset_conf[1]["systems"]["length_unit"] = "bar" options["training_set"] = ref_dataset_conf options["validation_set"] = ref_dataset_conf @@ -262,7 +262,7 @@ def test_continue_different_dataset(options, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) shutil.copy(RESOURCES_PATH / "ethanol_reduced_100.xyz", "ethanol_reduced_100.xyz") - options["training_set"]["structures"]["read_from"] = "ethanol_reduced_100.xyz" + options["training_set"]["systems"]["read_from"] = "ethanol_reduced_100.xyz" options["training_set"]["targets"]["energy"]["key"] = "energy" train_model(options, continue_from=MODEL_PATH) diff --git a/tests/resources/options.yaml b/tests/resources/options.yaml index de256a289..22c0ee568 100644 --- a/tests/resources/options.yaml +++ b/tests/resources/options.yaml @@ -5,7 +5,7 @@ architecture: num_epochs: 1 training_set: - structures: + systems: read_from: "qm9_reduced_100.xyz" targets: energy: diff --git a/tests/utils/data/structures/test_structures_ase.py b/tests/utils/data/structures/test_structures_ase.py index 638e9b9ee..f6ef8e659 100644 --- a/tests/utils/data/structures/test_structures_ase.py +++ b/tests/utils/data/structures/test_structures_ase.py @@ -2,7 +2,7 @@ import ase.io import torch -from metatensor.models.utils.data.readers.structures import read_structures_ase +from metatensor.models.utils.data.readers.systems import read_systems_ase def ase_system() -> ase.Atoms: @@ -16,18 +16,18 @@ def ase_system() -> ase.Atoms: def test_read_ase(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_system() - ase.io.write(filename, structures) + systems = ase_system() + ase.io.write(filename, systems) - result = read_structures_ase(filename) + result = read_systems_ase(filename) assert isinstance(result, list) assert len(result) == 1 assert isinstance(result[0], torch.ScriptObject) - torch.testing.assert_close(result[0].positions, torch.tensor(structures.positions)) + torch.testing.assert_close(result[0].positions, torch.tensor(systems.positions)) torch.testing.assert_close( result[0].species, torch.tensor([1, 1], dtype=torch.int32) ) diff --git a/tests/utils/data/targets/test_targets_ase.py b/tests/utils/data/targets/test_targets_ase.py index 62054b663..ed7efff42 100644 --- a/tests/utils/data/targets/test_targets_ase.py +++ b/tests/utils/data/targets/test_targets_ase.py @@ -37,14 +37,14 @@ def ase_systems() -> List[ase.Atoms]: def test_read_energy_ase(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + systems = ase_systems() + ase.io.write(filename, systems) results = read_energy_ase(filename=filename, key="true_energy", dtype=torch.float16) - for result, atoms in zip(results, structures): + for result, atoms in zip(results, systems): expected = torch.tensor([[atoms.info["true_energy"]]], dtype=torch.float16) torch.testing.assert_close(result.values, expected) @@ -52,14 +52,14 @@ def test_read_energy_ase(monkeypatch, tmp_path): def test_read_forces_ase(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + systems = ase_systems() + ase.io.write(filename, systems) results = read_forces_ase(filename=filename, key="forces", dtype=torch.float16) - for result, atoms in zip(results, structures): + for result, atoms in zip(results, systems): expected = -torch.tensor(atoms.get_array("forces"), dtype=torch.float16) expected = expected.reshape(-1, 3, 1) torch.testing.assert_close(result.values, expected) @@ -68,14 +68,14 @@ def test_read_forces_ase(monkeypatch, tmp_path): def test_read_stress_ase(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + systems = ase_systems() + ase.io.write(filename, systems) results = read_stress_ase(filename=filename, key="stress-3x3", dtype=torch.float16) - for result, atoms in zip(results, structures): + for result, atoms in zip(results, systems): expected = atoms.cell.volume * torch.tensor( atoms.info["stress-3x3"], dtype=torch.float16 ) @@ -87,28 +87,28 @@ def test_no_cell_error(monkeypatch, tmp_path): """Test error raise if cell vectors are zero for reading stress.""" monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_system() - structures.cell = [0.0, 0.0, 0.0] + systems = ase_system() + systems.cell = [0.0, 0.0, 0.0] - ase.io.write(filename, structures) + ase.io.write(filename, systems) - with pytest.raises(ValueError, match="Structure 0 has zero cell vectors."): + with pytest.raises(ValueError, match="system 0 has zero cell vectors."): read_stress_ase(filename=filename, key="stress-3x3") def test_read_virial_ase(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + systems = ase_systems() + ase.io.write(filename, systems) results = read_virial_ase(filename=filename, key="stress-3x3", dtype=torch.float16) - for result, atoms in zip(results, structures): + for result, atoms in zip(results, systems): expected = -torch.tensor(atoms.info["stress-3x3"], dtype=torch.float16) expected = expected.reshape(-1, 3, 3, 1) torch.testing.assert_close(result.values, expected) @@ -117,15 +117,15 @@ def test_read_virial_ase(monkeypatch, tmp_path): def test_read_virial_warn(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_system() - ase.io.write(filename, structures) + systems = ase_system() + ase.io.write(filename, systems) with pytest.warns(match="Found 9-long numerical vector"): results = read_virial_ase(filename=filename, key="stress-9") - expected = -torch.tensor(structures.info["stress-9"]) + expected = -torch.tensor(systems.info["stress-9"]) expected = expected.reshape(-1, 3, 3, 1) torch.testing.assert_close(results[0].values, expected) @@ -133,11 +133,11 @@ def test_read_virial_warn(monkeypatch, tmp_path): def test_read_virial_error(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" + filename = "systems.xyz" - structures = ase_system() - structures.info["stress-9"].append(1) - ase.io.write(filename, structures) + systems = ase_system() + systems.info["stress-9"].append(1) + ase.io.write(filename, systems) with pytest.raises(ValueError, match="Stress/virial must be a 3 x 3 matrix"): read_virial_ase(filename=filename, key="stress-9") diff --git a/tests/utils/data/test_combine_dataloaders.py b/tests/utils/data/test_combine_dataloaders.py index 6eb6eb8c1..278bdf243 100644 --- a/tests/utils/data/test_combine_dataloaders.py +++ b/tests/utils/data/test_combine_dataloaders.py @@ -7,7 +7,7 @@ from metatensor.models.utils.data import ( collate_fn, combine_dataloaders, - read_structures, + read_systems, read_targets, ) @@ -20,7 +20,7 @@ def test_without_shuffling(): """Tests combining dataloaders without shuffling.""" - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz") + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz") conf = { "U0": { @@ -34,11 +34,11 @@ def test_without_shuffling(): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, U0=targets["U0"]) + dataset = Dataset(system=systems, U0=targets["U0"]) dataloader_qm9 = DataLoader(dataset, batch_size=10, collate_fn=collate_fn) # will yield 10 batches of 10 - structures = read_structures(RESOURCES_PATH / "alchemical_reduced_10.xyz") + systems = read_systems(RESOURCES_PATH / "alchemical_reduced_10.xyz") conf = { "free_energy": { @@ -52,7 +52,7 @@ def test_without_shuffling(): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, free_energy=targets["free_energy"]) + dataset = Dataset(system=systems, free_energy=targets["free_energy"]) dataloader_alchemical = DataLoader(dataset, batch_size=2, collate_fn=collate_fn) # will yield 5 batches of 2 @@ -73,7 +73,7 @@ def test_with_shuffling(): # WARNING: this test might fail if the random seed is changed, # with a probability of 1/(15 5) = 1/3003 - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz") + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz") conf = { "U0": { @@ -87,11 +87,11 @@ def test_with_shuffling(): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, U0=targets["U0"]) + dataset = Dataset(system=systems, U0=targets["U0"]) dataloader_qm9 = DataLoader(dataset, batch_size=10, collate_fn=collate_fn) # will yield 10 batches of 10 - structures = read_structures(RESOURCES_PATH / "alchemical_reduced_10.xyz") + systems = read_systems(RESOURCES_PATH / "alchemical_reduced_10.xyz") conf = { "free_energy": { @@ -105,7 +105,7 @@ def test_with_shuffling(): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, free_energy=targets["free_energy"]) + dataset = Dataset(system=systems, free_energy=targets["free_energy"]) dataloader_alchemical = DataLoader(dataset, batch_size=2, collate_fn=collate_fn) # will yield 5 batches of 2 diff --git a/tests/utils/data/test_dataset.py b/tests/utils/data/test_dataset.py index 9b8b8493e..0eafb02bd 100644 --- a/tests/utils/data/test_dataset.py +++ b/tests/utils/data/test_dataset.py @@ -7,7 +7,7 @@ from metatensor.models.utils.data import ( collate_fn, get_all_species, - read_structures, + read_systems, read_targets, ) @@ -18,7 +18,7 @@ def test_dataset(): """Tests the readers and the dataset class.""" - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz") + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz") filename = str(RESOURCES_PATH / "qm9_reduced_100.xyz") conf = { @@ -33,7 +33,7 @@ def test_dataset(): } } targets = read_targets(OmegaConf.create(conf)) - dataset = Dataset(structure=structures, energy=targets["energy"]) + dataset = Dataset(system=systems, energy=targets["energy"]) dataloader = torch.utils.data.DataLoader( dataset, batch_size=10, collate_fn=collate_fn ) @@ -45,7 +45,7 @@ def test_dataset(): def test_species_list(): """Tests that the species list is correctly computed with get_all_species.""" - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz") + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz") conf = { "energy": { "quantity": "energy", @@ -57,7 +57,7 @@ def test_species_list(): "virial": False, } } - structures_2 = read_structures(RESOURCES_PATH / "ethanol_reduced_100.xyz") + systems_2 = read_systems(RESOURCES_PATH / "ethanol_reduced_100.xyz") conf_2 = { "energy": { "quantity": "energy", @@ -71,8 +71,8 @@ def test_species_list(): } targets = read_targets(OmegaConf.create(conf)) targets_2 = read_targets(OmegaConf.create(conf_2)) - dataset = Dataset(structure=structures, **targets) - dataset_2 = Dataset(structure=structures_2, **targets_2) + dataset = Dataset(system=systems, **targets) + dataset_2 = Dataset(system=systems_2, **targets_2) assert get_all_species(dataset) == [1, 6, 7, 8] assert get_all_species(dataset_2) == [1, 6, 8] assert get_all_species([dataset, dataset_2]) == [1, 6, 7, 8] diff --git a/tests/utils/data/test_readers.py b/tests/utils/data/test_readers.py index 95386797e..a46e89291 100644 --- a/tests/utils/data/test_readers.py +++ b/tests/utils/data/test_readers.py @@ -15,58 +15,58 @@ read_energy, read_forces, read_stress, - read_structures, + read_systems, read_targets, read_virial, ) @pytest.mark.parametrize("fileformat", (None, ".xyz", ".extxyz")) -def test_read_structures(fileformat, monkeypatch, tmp_path): +def test_read_systems(fileformat, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_systems() + ase.io.write(filename, systems) - results = read_structures(filename, fileformat=fileformat, dtype=torch.float16) + results = read_systems(filename, fileformat=fileformat, dtype=torch.float16) assert isinstance(results, list) - assert len(results) == len(structures) - for structure, result in zip(structures, results): + assert len(results) == len(systems) + for system, result in zip(systems, results): assert isinstance(result, torch.ScriptObject) torch.testing.assert_close( - result.positions, torch.tensor(structure.positions, dtype=torch.float16) + result.positions, torch.tensor(system.positions, dtype=torch.float16) ) torch.testing.assert_close( result.species, torch.tensor([1, 1], dtype=torch.int32) ) -def test_read_structures_unknown_fileformat(): +def test_read_systems_unknown_fileformat(): with pytest.raises(ValueError, match="fileformat '.bar' is not supported"): - read_structures("foo.bar") + read_systems("foo.bar") @pytest.mark.parametrize("fileformat", (None, ".xyz", ".extxyz")) def test_read_energies(fileformat, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_systems() + ase.io.write(filename, systems) results = read_energy( filename, fileformat=fileformat, target_value="true_energy", dtype=torch.float16 ) assert type(results) is list - assert len(results) == len(structures) - for i_structure, result in enumerate(results): + assert len(results) == len(systems) + for i_system, result in enumerate(results): assert result.values.dtype is torch.float16 - assert result.samples.names == ["structure"] - assert result.samples.values == torch.tensor([[i_structure]]) + assert result.samples.names == ["system"] + assert result.samples.values == torch.tensor([[i_system]]) assert result.properties == Labels.single() @@ -74,21 +74,21 @@ def test_read_energies(fileformat, monkeypatch, tmp_path): def test_read_forces(fileformat, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_systems() + ase.io.write(filename, systems) results = read_forces( filename, fileformat=fileformat, target_value="forces", dtype=torch.float16 ) assert type(results) is list - assert len(results) == len(structures) - for i_structure, result in enumerate(results): + assert len(results) == len(systems) + for i_system, result in enumerate(results): assert result.values.dtype is torch.float16 - assert result.samples.names == ["sample", "structure", "atom"] + assert result.samples.names == ["sample", "system", "atom"] assert torch.all(result.samples["sample"] == torch.tensor(0)) - assert torch.all(result.samples["structure"] == torch.tensor(i_structure)) + assert torch.all(result.samples["system"] == torch.tensor(i_system)) assert result.components == [Labels(["xyz"], torch.arange(3).reshape(-1, 1))] assert result.properties == Labels.single() @@ -98,16 +98,16 @@ def test_read_forces(fileformat, monkeypatch, tmp_path): def test_read_stress_virial(reader, fileformat, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_systems() + ase.io.write(filename, systems) results = reader( filename, fileformat=fileformat, target_value="stress-3x3", dtype=torch.float16 ) assert type(results) is list - assert len(results) == len(structures) + assert len(results) == len(systems) components = [ Labels(["xyz_1"], torch.arange(3).reshape(-1, 1)), Labels(["xyz_2"], torch.arange(3).reshape(-1, 1)), @@ -127,7 +127,7 @@ def test_reader_unknown_fileformat(reader): STRESS_VIRIAL_DICT = { - "read_from": "structures.xyz", + "read_from": "systems.xyz", "file_format": ".xyz", "key": "stress-3x3", } @@ -140,9 +140,9 @@ def test_reader_unknown_fileformat(reader): def test_read_targets(stress_dict, virial_dict, monkeypatch, tmp_path, caplog): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_systems() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_systems() + ase.io.write(filename, systems) energy_section = { "quantity": "energy", @@ -178,12 +178,12 @@ def test_read_targets(stress_dict, virial_dict, monkeypatch, tmp_path, caplog): result_block = target.block() assert result_block.values.dtype is torch.float16 - assert result_block.samples.names == ["structure"] + assert result_block.samples.names == ["system"] assert result_block.properties == Labels.single() pos_grad = result_block.gradient("positions") assert pos_grad.values.dtype is torch.float16 - assert pos_grad.samples.names == ["sample", "structure", "atom"] + assert pos_grad.samples.names == ["sample", "system", "atom"] assert pos_grad.components == [ Labels(["xyz"], torch.arange(3).reshape(-1, 1)) ] @@ -207,15 +207,15 @@ def test_read_targets(stress_dict, virial_dict, monkeypatch, tmp_path, caplog): def test_read_targets_warnings(stress_dict, virial_dict, monkeypatch, tmp_path, caplog): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_system() + filename = "systems.xyz" + systems = ase_system() # Delete gradient sections - structures.info.pop("stress-3x3") - structures.info.pop("stress-9") - structures.arrays.pop("forces") + systems.info.pop("stress-3x3") + systems.info.pop("stress-9") + systems.arrays.pop("forces") - ase.io.write(filename, structures) + ase.io.write(filename, systems) energy_section = { "quantity": "energy", @@ -230,7 +230,7 @@ def test_read_targets_warnings(stress_dict, virial_dict, monkeypatch, tmp_path, conf = {"energy": energy_section} caplog.set_level(logging.WARNING) - read_targets(OmegaConf.create(conf)) # , slice_samples_by="structure") + read_targets(OmegaConf.create(conf)) # , slice_samples_by="system") assert any(["Forces not found" in rec.message for rec in caplog.records]) @@ -243,9 +243,9 @@ def test_read_targets_warnings(stress_dict, virial_dict, monkeypatch, tmp_path, def test_read_targets_error(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - filename = "structures.xyz" - structures = ase_system() - ase.io.write(filename, structures) + filename = "systems.xyz" + systems = ase_system() + ase.io.write(filename, systems) energy_section = { "quantity": "energy", diff --git a/tests/utils/data/test_target_writers.py b/tests/utils/data/test_target_writers.py index ff4babc99..169af51d5 100644 --- a/tests/utils/data/test_target_writers.py +++ b/tests/utils/data/test_target_writers.py @@ -10,11 +10,11 @@ from metatensor.models.utils.data.writers import write_predictions, write_xyz -def structures_predictions(cell: torch.tensor = None) -> List[System]: +def systems_predictions(cell: torch.tensor = None) -> List[System]: if cell is None: cell = torch.zeros(3, 3) - structures = systems_to_torch( + systems = systems_to_torch( 2 * [ System( @@ -26,32 +26,32 @@ def structures_predictions(cell: torch.tensor = None) -> List[System]: ) # Create a mock TensorMap for predictions - n_structures = len(structures) + n_systems = len(systems) values = torch.tensor([[1.0], [2.0]]) block = TensorBlock( values=values.reshape(-1, 1), - samples=Labels(["structure"], torch.arange(n_structures).reshape(-1, 1)), + samples=Labels(["system"], torch.arange(n_systems).reshape(-1, 1)), components=[], properties=Labels(["energy"], torch.tensor([(0,)])), ) predictions = {"energy": TensorMap(Labels.single(), [block])} - return structures, predictions + return systems, predictions def test_write_xyz(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - structures, predictions = structures_predictions() + systems, predictions = systems_predictions() filename = "test_output.xyz" - write_xyz(filename, predictions, structures) + write_xyz(filename, predictions, systems) # Read the file and verify its contents frames = ase.io.read(filename, index=":") - assert len(frames) == len(structures) + assert len(frames) == len(systems) for i, atoms in enumerate(frames): assert atoms.info["energy"] == float(predictions["energy"].block().values[i, 0]) assert all(atoms.pbc == 3 * [False]) @@ -61,11 +61,11 @@ def test_write_xyz_cell(monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) cell = torch.ones(3, 3) - structures, predictions = structures_predictions(cell=cell) + systems, predictions = systems_predictions(cell=cell) filename = "test_output.xyz" - write_xyz(filename, predictions, structures) + write_xyz(filename, predictions, systems) # Read the file and verify its contents frames = ase.io.read(filename, index=":") @@ -78,18 +78,18 @@ def test_write_xyz_cell(monkeypatch, tmp_path): def test_write_predictions(fileformat, monkeypatch, tmp_path): monkeypatch.chdir(tmp_path) - structures, predictions = structures_predictions() + systems, predictions = systems_predictions() filename = "test_output.xyz" - write_predictions(filename, predictions, structures, fileformat=fileformat) + write_predictions(filename, predictions, systems, fileformat=fileformat) frames = ase.io.read(filename, index=":") - assert len(frames) == len(structures) + assert len(frames) == len(systems) for i, frame in enumerate(frames): assert frame.info["energy"] == float(predictions["energy"].block().values[i, 0]) def test_write_predictions_unknown_fileformat(): with pytest.raises(ValueError, match="fileformat '.bar' is not supported"): - write_predictions("foo.bar", predictions=None, structures=None) + write_predictions("foo.bar", predictions=None, systems=None) diff --git a/tests/utils/test_compute_loss.py b/tests/utils/test_compute_loss.py index b7143a0b8..d4cde9fb8 100644 --- a/tests/utils/test_compute_loss.py +++ b/tests/utils/test_compute_loss.py @@ -6,7 +6,7 @@ from metatensor.models.experimental import soap_bpnn from metatensor.models.utils.compute_loss import compute_model_loss -from metatensor.models.utils.data import read_structures +from metatensor.models.utils.data import read_systems from metatensor.models.utils.loss import TensorMapDictLoss @@ -36,7 +36,7 @@ def test_compute_model_loss(): model = soap_bpnn.Model(capabilities) # model = torch.jit.script(model) # jit the model for good measure - structures = read_structures(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] + systems = read_systems(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] gradient_samples = Labels( names=["sample", "atom"], @@ -44,13 +44,11 @@ def test_compute_model_loss(): [ torch.concatenate( [ - torch.tensor([i] * len(structure)) - for i, structure in enumerate(structures) + torch.tensor([i] * len(system)) + for i, system in enumerate(systems) ] ), - torch.concatenate( - [torch.arange(len(structure)) for structure in structures] - ), + torch.concatenate([torch.arange(len(system)) for system in systems]), ], dim=1, ), @@ -64,8 +62,8 @@ def test_compute_model_loss(): ] block = TensorBlock( - values=torch.tensor([[0.0] * len(structures)]).T, - samples=Labels.range("structure", len(structures)), + values=torch.tensor([[0.0] * len(systems)]).T, + samples=Labels.range("system", len(systems)), components=[], properties=Labels.single(), ) @@ -76,8 +74,8 @@ def test_compute_model_loss(): values=torch.tensor( [ [[1.0], [1.0], [1.0]] - for structure in structures - for _ in range(len(structure.positions)) + for system in systems + for _ in range(len(system.positions)) ] ), samples=gradient_samples, @@ -99,6 +97,6 @@ def test_compute_model_loss(): compute_model_loss( loss_fn, model, - structures, + systems, targets, ) diff --git a/tests/utils/test_model_io.py b/tests/utils/test_model_io.py index 80dc50ebf..fb8545d69 100644 --- a/tests/utils/test_model_io.py +++ b/tests/utils/test_model_io.py @@ -7,7 +7,7 @@ from metatensor.torch.atomistic import ModelCapabilities, ModelOutput from metatensor.models.experimental import soap_bpnn -from metatensor.models.utils.data import read_structures +from metatensor.models.utils.data import read_systems from metatensor.models.utils.export import is_exported from metatensor.models.utils.model_io import ( load_checkpoint, @@ -35,10 +35,10 @@ def test_save_load_checkpoint(monkeypatch, tmp_path): ) model = soap_bpnn.Model(capabilities) - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz") + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz") output_before_save = model( - rascaline.torch.systems_to_torch(structures), + rascaline.torch.systems_to_torch(systems), {"energy": model.capabilities.outputs["energy"]}, ) @@ -46,7 +46,7 @@ def test_save_load_checkpoint(monkeypatch, tmp_path): loaded_model = load_checkpoint("test_model.pt") output_after_load = loaded_model( - rascaline.torch.systems_to_torch(structures), + rascaline.torch.systems_to_torch(systems), {"energy": model.capabilities.outputs["energy"]}, ) diff --git a/tests/utils/test_neighbor_list.py b/tests/utils/test_neighbor_list.py index f095d02b8..afd55910c 100644 --- a/tests/utils/test_neighbor_list.py +++ b/tests/utils/test_neighbor_list.py @@ -2,7 +2,7 @@ from metatensor.torch.atomistic import NeighborsListOptions -from metatensor.models.utils.data.readers.structures import read_structures_ase +from metatensor.models.utils.data.readers.systems import read_systems_ase from metatensor.models.utils.neighbors_lists import get_system_with_neighbors_lists @@ -11,7 +11,7 @@ def test_attach_neighbor_lists(): filename = RESOURCES_PATH / "qm9_reduced_100.xyz" - structures = read_structures_ase(filename) + systems = read_systems_ase(filename) requested_neighbor_lists = [ NeighborsListOptions(model_cutoff=4.0, full_list=True), @@ -19,9 +19,7 @@ def test_attach_neighbor_lists(): NeighborsListOptions(model_cutoff=6.0, full_list=True), ] - new_system = get_system_with_neighbors_lists( - structures[0], requested_neighbor_lists - ) + new_system = get_system_with_neighbors_lists(systems[0], requested_neighbor_lists) assert requested_neighbor_lists[0] in new_system.known_neighbors_lists() assert requested_neighbor_lists[1] in new_system.known_neighbors_lists() diff --git a/tests/utils/test_omegaconf.py b/tests/utils/test_omegaconf.py index 0f4811e77..a1f95c906 100644 --- a/tests/utils/test_omegaconf.py +++ b/tests/utils/test_omegaconf.py @@ -22,7 +22,7 @@ def test_expand_dataset_config(n_datasets): file_name = "foo.xyz" file_format = ".xyz" - structure_section = {"read_from": file_name, "length_unit": "angstrom"} + system_section = {"read_from": file_name, "length_unit": "angstrom"} target_section = { "quantity": "energy", @@ -31,7 +31,7 @@ def test_expand_dataset_config(n_datasets): } conf = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section, "my_target": target_section}, } @@ -43,9 +43,9 @@ def test_expand_dataset_config(n_datasets): assert len(conf_expanded_list) == n_datasets for conf_expanded in conf_expanded_list: - assert conf_expanded["structures"]["read_from"] == file_name - assert conf_expanded["structures"]["file_format"] == file_format - assert conf_expanded["structures"]["length_unit"] == "angstrom" + assert conf_expanded["systems"]["read_from"] == file_name + assert conf_expanded["systems"]["file_format"] == file_format + assert conf_expanded["systems"]["length_unit"] == "angstrom" targets_conf = conf_expanded["targets"] assert len(targets_conf) == 2 @@ -75,14 +75,14 @@ def test_expand_dataset_config(n_datasets): def test_expand_dataset_config_not_energy(): file_name = "foo.xyz" - structure_section = {"read_from": file_name, "unit": "angstrom"} + system_section = {"read_from": file_name, "unit": "angstrom"} target_section = { "quantity": "my_dipole_moment", } conf = { - "structures": structure_section, + "systems": system_section, "targets": {"dipole_moment": target_section}, } @@ -106,9 +106,9 @@ def test_expand_dataset_config_min(): conf_expanded_list = expand_dataset_config(file_name) conf_expanded = conf_expanded_list[0] - assert conf_expanded["structures"]["read_from"] == file_name - assert conf_expanded["structures"]["file_format"] == file_format - assert conf_expanded["structures"]["length_unit"] is None + assert conf_expanded["systems"]["read_from"] == file_name + assert conf_expanded["systems"]["file_format"] == file_format + assert conf_expanded["systems"]["length_unit"] is None targets_conf = conf_expanded["targets"] assert targets_conf["energy"]["quantity"] == "energy" @@ -130,7 +130,7 @@ def test_expand_dataset_config_error(): file_name = "foo.xyz" conf = { - "structures": file_name, + "systems": file_name, "targets": { "energy": { "virial": file_name, @@ -147,7 +147,7 @@ def test_expand_dataset_config_error(): def test_expand_dataset_gradient(): conf = { - "structures": "foo.xyz", + "systems": "foo.xyz", "targets": { "my_energy": { "forces": "data.txt", @@ -169,7 +169,7 @@ def test_expand_dataset_gradient(): def test_check_units(): file_name = "foo.xyz" - structure_section = {"read_from": file_name, "length_unit": "angstrom"} + system_section = {"read_from": file_name, "length_unit": "angstrom"} target_section = { "quantity": "energy", @@ -186,11 +186,11 @@ def test_check_units(): } conf = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section, "my_target": mytarget_section}, } - structure_section1 = {"read_from": file_name, "length_unit": "angstrom1"} + system_section1 = {"read_from": file_name, "length_unit": "angstrom1"} target_section1 = { "quantity": "energy", @@ -207,19 +207,19 @@ def test_check_units(): } conf1 = { - "structures": structure_section1, + "systems": system_section1, "targets": {"energy": target_section, "my_target": mytarget_section}, } conf0 = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section, "my_target0": mytarget_section}, } conf2 = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section1, "my_target": mytarget_section}, } conf3 = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section, "my_target": mytarget_section1}, } @@ -271,12 +271,12 @@ def test_check_units(): def test_missing_targets_section(): - conf = {"structures": "foo.xyz"} + conf = {"systems": "foo.xyz"} conf_expanded_list = expand_dataset_config(OmegaConf.create(conf)) conf_expanded = conf_expanded_list[0] - assert conf_expanded["structures"]["read_from"] == "foo.xyz" - assert conf_expanded["structures"]["file_format"] == ".xyz" + assert conf_expanded["systems"]["read_from"] == "foo.xyz" + assert conf_expanded["systems"]["file_format"] == ".xyz" def test_missing_strcutures_section(): @@ -292,7 +292,7 @@ def test_missing_strcutures_section(): def list_conf(): file_name = "foo.xyz" - structure_section = {"read_from": file_name, "length_unit": "angstrom"} + system_section = {"read_from": file_name, "length_unit": "angstrom"} target_section = { "quantity": "energy", @@ -302,7 +302,7 @@ def list_conf(): } conf = { - "structures": structure_section, + "systems": system_section, "targets": {"energy": target_section, "my_target": target_section}, } @@ -310,8 +310,8 @@ def list_conf(): def test_check_options_list_length_unit(list_conf): - list_conf[1]["structures"]["length_unit"] = "foo" - list_conf[2]["structures"]["length_unit"] = "bar" + list_conf[1]["systems"]["length_unit"] = "foo" + list_conf[2]["systems"]["length_unit"] = "bar" match = ( "`length_unit`s are inconsistent between one of the dataset options. " diff --git a/tests/utils/test_output_gradient.py b/tests/utils/test_output_gradient.py index 8794802e6..50f445319 100644 --- a/tests/utils/test_output_gradient.py +++ b/tests/utils/test_output_gradient.py @@ -7,7 +7,7 @@ from metatensor.torch.atomistic import ModelCapabilities, ModelOutput from metatensor.models.experimental import soap_bpnn -from metatensor.models.utils.data import read_structures +from metatensor.models.utils.data import read_systems from metatensor.models.utils.output_gradient import compute_gradient @@ -30,26 +30,22 @@ def test_forces(is_training): ) model = soap_bpnn.Model(capabilities) - structures = read_structures(RESOURCES_PATH / "qm9_reduced_100.xyz")[:5] - structures = rascaline.torch.systems_to_torch( - structures, positions_requires_grad=True - ) - output = model(structures, {"energy": model.capabilities.outputs["energy"]}) + systems = read_systems(RESOURCES_PATH / "qm9_reduced_100.xyz")[:5] + systems = rascaline.torch.systems_to_torch(systems, positions_requires_grad=True) + output = model(systems, {"energy": model.capabilities.outputs["energy"]}) position_gradients = compute_gradient( output["energy"].block().values, - [structure.positions for structure in structures], + [system.positions for system in systems], is_training=is_training, ) forces = [-position_gradient for position_gradient in position_gradients] jitted_model = torch.jit.script(model) - structures = rascaline.torch.systems_to_torch( - structures, positions_requires_grad=True - ) - output = jitted_model(structures, {"energy": model.capabilities.outputs["energy"]}) + systems = rascaline.torch.systems_to_torch(systems, positions_requires_grad=True) + output = jitted_model(systems, {"energy": model.capabilities.outputs["energy"]}) jitted_position_gradients = compute_gradient( output["energy"].block().values, - [structure.positions for structure in structures], + [system.positions for system in systems], is_training=is_training, ) jitted_forces = [ @@ -76,13 +72,13 @@ def test_virial(is_training): ) model = soap_bpnn.Model(capabilities) - structures = read_structures(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] + systems = read_systems(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] strains = [ torch.eye( 3, requires_grad=True, dtype=system.cell.dtype, device=system.cell.device ) - for system in structures + for system in systems ] systems = [ metatensor.torch.atomistic.System( @@ -90,7 +86,7 @@ def test_virial(is_training): cell=system.cell @ strain, species=system.species, ) - for system, strain in zip(structures, strains) + for system, strain in zip(systems, strains) ] output = model(systems, {"energy": model.capabilities.outputs["energy"]}) @@ -107,7 +103,7 @@ def test_virial(is_training): torch.eye( 3, requires_grad=True, dtype=system.cell.dtype, device=system.cell.device ) - for system in structures + for system in systems ] systems = [ metatensor.torch.atomistic.System( @@ -115,7 +111,7 @@ def test_virial(is_training): cell=system.cell @ strain, species=system.species, ) - for system, strain in zip(structures, strains) + for system, strain in zip(systems, strains) ] output = jitted_model(systems, {"energy": model.capabilities.outputs["energy"]}) @@ -146,7 +142,7 @@ def test_both(is_training): ) model = soap_bpnn.Model(capabilities) - structures = read_structures(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] + systems = read_systems(RESOURCES_PATH / "alchemical_reduced_10.xyz")[:2] # Here we re-create strains and systems, otherwise torch # complains that the graph has already beeen freed in the last grad call @@ -154,7 +150,7 @@ def test_both(is_training): torch.eye( 3, requires_grad=True, dtype=system.cell.dtype, device=system.cell.device ) - for system in structures + for system in systems ] systems = [ metatensor.torch.atomistic.System( @@ -162,7 +158,7 @@ def test_both(is_training): cell=system.cell @ strain, species=system.species, ) - for system, strain in zip(structures, strains) + for system, strain in zip(systems, strains) ] output = model(systems, {"energy": model.capabilities.outputs["energy"]}) @@ -177,7 +173,7 @@ def test_both(is_training): torch.eye( 3, requires_grad=True, dtype=system.cell.dtype, device=system.cell.device ) - for system in structures + for system in systems ] systems = [ metatensor.torch.atomistic.System( @@ -185,7 +181,7 @@ def test_both(is_training): cell=system.cell @ strain, species=system.species, ) - for system, strain in zip(structures, strains) + for system, strain in zip(systems, strains) ] jitted_model = torch.jit.script(model)