From aa30355ece9ccd066495300c47be38f692122703 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Mon, 15 Jul 2024 15:31:07 +0200 Subject: [PATCH 01/15] Experimental kokkos interface --- cmake/Modules/Packages/KOKKOS.cmake | 5 + cmake/Modules/Packages/ML-METATENSOR.cmake | 16 +- .../PACKAGES/metatensor/in.kokkos.metatensor | 28 + src/KOKKOS/metatensor_system_kokkos.cpp | 390 +++++++++++ src/KOKKOS/metatensor_system_kokkos.h | 141 ++++ src/KOKKOS/pair_metatensor_kokkos.cpp | 637 ++++++++++++++++++ src/KOKKOS/pair_metatensor_kokkos.h | 59 ++ src/ML-METATENSOR/pair_metatensor.cpp | 2 +- 8 files changed, 1269 insertions(+), 9 deletions(-) create mode 100644 examples/PACKAGES/metatensor/in.kokkos.metatensor create mode 100644 src/KOKKOS/metatensor_system_kokkos.cpp create mode 100644 src/KOKKOS/metatensor_system_kokkos.h create mode 100644 src/KOKKOS/pair_metatensor_kokkos.cpp create mode 100644 src/KOKKOS/pair_metatensor_kokkos.h diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 3776d18a3e1..d462ce26385 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -182,6 +182,11 @@ if(PKG_ML-IAP) endif() endif() +if(PKG_ML-METATENSOR) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/metatensor_system_kokkos.cpp) + +endif() + if(PKG_PHONON) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/dynamical_matrix_kokkos.cpp) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/third_order_kokkos.cpp) diff --git a/cmake/Modules/Packages/ML-METATENSOR.cmake b/cmake/Modules/Packages/ML-METATENSOR.cmake index ca021cc7051..3aa7057b5dd 100644 --- a/cmake/Modules/Packages/ML-METATENSOR.cmake +++ b/cmake/Modules/Packages/ML-METATENSOR.cmake @@ -4,14 +4,14 @@ if(CMAKE_CXX_STANDARD LESS 17) be set to at least C++17") endif() -if (BUILD_OMP AND APPLE) - message(FATAL_ERROR - "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, " - "since this results in two different versions of libiomp5.dylib (one " - "from the system and one from Torch) being linked to the final " - "executable, which then segfaults" - ) -endif() +# if (BUILD_OMP AND APPLE) +# message(FATAL_ERROR +# "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, " +# "since this results in two different versions of libiomp5.dylib (one " +# "from the system and one from Torch) being linked to the final " +# "executable, which then segfaults" +# ) +# endif() # Bring the `torch` target in scope to allow evaluation # of cmake generator expression from `metatensor_torch` diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor new file mode 100644 index 00000000000..78075759c40 --- /dev/null +++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor @@ -0,0 +1,28 @@ +units metal +boundary p p p + +atom_style atomic/kk +lattice fcc 3.6 +region box block 0 2 0 2 0 2 +create_box 1 box +create_atoms 1 box + +# labelmap atom 1 Ni # lammps-kokkos doesn't like this +mass 1 58.693 + +velocity all create 123 42 + +run_style verlet/kk + +pair_style metatensor/kk nickel-lj.pt device cuda +pair_coeff * * 28 + +timestep 0.001 +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(100 * dt) + +thermo 1 +thermo_style custom step temp pe etotal press vol + +# dump 1 all atom 10 dump.metatensor + +run 100 diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp new file mode 100644 index 00000000000..e4fc076e04f --- /dev/null +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -0,0 +1,390 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Guillaume Fraux +------------------------------------------------------------------------- */ +#include "metatensor_system_kokkos.h" + +#include "atom.h" +#include "domain.h" +#include "error.h" +#include "neighbor.h" + +#include "neigh_list.h" +#include "neigh_request.h" + +#include "kokkos.h" +#include "atom_kokkos.h" + +#ifndef KOKKOS_ENABLE_CUDA +namespace Kokkos { +class Cuda {}; +} // namespace Kokkos +#endif // KOKKOS_ENABLE_CUDA + +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +template +MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos options): + Pointers(lmp), + list_(nullptr), + options_(std::move(options)), + caches_(), + atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32))) +{ + torch::Device device = torch::kCPU; + if (std::is_same::value) { + device = torch::kCUDA; + } else { + device = torch::kCPU; + } + + // We ask LAMMPS for a full neighbor lists because we need to know about + // ALL pairs, even if options->full_list() is false. We will then filter + // the pairs to only include each pair once where needed. + auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + request->set_id(0); + request->set_cutoff(options_.interaction_range); + + this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true)); +} + +template +MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Compute* requestor, MetatensorSystemOptionsKokkos options): + Pointers(lmp), + list_(nullptr), + options_(std::move(options)), + caches_(), + atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32))) +{ + torch::Device device = torch::kCPU; + if (std::is_same::value) { + device = torch::kCUDA; + } else { + device = torch::kCPU; + } + + auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); + request->set_id(0); + request->set_cutoff(options_.interaction_range); + + this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true)); +} + +template +MetatensorSystemAdaptorKokkos::~MetatensorSystemAdaptorKokkos() { + +} + +template +void MetatensorSystemAdaptorKokkos::init_list(int id, NeighList* ptr) { + assert(id == 0); + list_ = ptr; +} + +template +void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) { + if (cutoff > options_.interaction_range) { + error->all(FLERR, + "Invalid metatensor model: one of the requested neighbor lists " + "has a cutoff ({}) larger than the model interaction range ({})", + cutoff, options_.interaction_range + ); + } else if (cutoff < 0 || !std::isfinite(cutoff)) { + error->all(FLERR, + "model requested an invalid cutoff for neighbors list: {} " + "(cutoff in model units is {})", + cutoff, request->cutoff() + ); + } + + caches_.push_back({ + cutoff, + request, + /*known_samples = */ {}, + /*samples = */ {}, + /*distances_f64 = */ {}, + /*distances_f32 = */ {}, + }); +} + + +template +void MetatensorSystemAdaptorKokkos::setup_neighbors(metatensor_torch::System& system) { + // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl; + auto dtype = system->positions().scalar_type(); + auto device = system->positions().device(); + + auto positions_kokkos = this->atomKK->k_x. template view(); + auto total_n_atoms = atomKK->nlocal + atomKK->nghost; + + auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64); + // it might be a good idea to have this as float32 if the model is using float32 + // to speed up the computation, especially on GPU + + + /*-------------- whatever, this will be done on CPU for now ------------------------*/ + + // Collect the local atom id of all local & ghosts atoms, mapping ghosts + // atoms which are periodic images of local atoms back to the local atoms. + // + // Metatensor expects pairs corresponding to periodic atoms to be between + // the main atoms, but using the actual distance vector between the atom and + // the ghost. + original_atom_id_.clear(); + original_atom_id_.reserve(total_n_atoms); + + // identify all local atom by their LAMMPS atom tag. + local_atoms_tags_.clear(); + for (int i=0; inlocal; i++) { + original_atom_id_.emplace_back(i); + local_atoms_tags_.emplace(atom->tag[i], i); + } + + // now loop over ghosts & map them back to the main cell if needed + ghost_atoms_tags_.clear(); + for (int i=atom->nlocal; itag[i]; + auto it = local_atoms_tags_.find(tag); + if (it != local_atoms_tags_.end()) { + // this is the periodic image of an atom already owned by this domain + original_atom_id_.emplace_back(it->second); + } else { + // this can either be a periodic image of an atom owned by another + // domain, or directly an atom from another domain. Since we can not + // really distinguish between these, we take the first atom as the + // "main" one and remap all atoms with the same tag to the first one + auto it = ghost_atoms_tags_.find(tag); + if (it != ghost_atoms_tags_.end()) { + // we already found this atom elsewhere in the system + original_atom_id_.emplace_back(it->second); + } else { + // this is the first time we are seeing this atom + original_atom_id_.emplace_back(i); + ghost_atoms_tags_.emplace(tag, i); + } + } + } + /*----------- end of whatever, this will be done on CPU for now --------------*/ + + auto original_atom_id_tensor = torch::from_blob( + original_atom_id_.data(), + {total_n_atoms}, + torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) + ); + original_atom_id_tensor = original_atom_id_tensor.to(device); // RIP + + // Accumulate total number of pairs + int total_number_of_pairs = 0; + for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { + total_number_of_pairs += list_->numneigh[ii]; + } + std::vector centers(total_number_of_pairs); + std::vector neighbors(total_number_of_pairs); + + // Fill the centers and neighbors arrays with the original atom ids + int pair_index = 0; + for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { + auto atom_i = list_->ilist[ii]; + auto neighbors_ii = list_->firstneigh[ii]; + for (int jj=0; jjnumneigh[ii]; jj++) { + centers[pair_index] = atom_i; + neighbors[pair_index] = neighbors_ii[jj]; + pair_index++; + } + } + + // Create torch tensors for the centers and neighbors arrays + auto centers_tensor = torch::from_blob( + centers.data(), + {total_number_of_pairs}, + torch::TensorOptions().dtype(torch::kInt32).device(device) + ); + auto neighbors_tensor = torch::from_blob( + neighbors.data(), + {total_number_of_pairs}, + torch::TensorOptions().dtype(torch::kInt32).device(device) + ); + + // change centers and neighbors to the original atom ids + auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor); + auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor); + + // create torch tensor with the positions (TEMPORARY, TODO: change) + auto positions_tensor = torch::from_blob( + positions_kokkos.data(), + {total_n_atoms, 3}, + torch::TensorOptions().dtype(torch::kFloat64).device(device) + ); + + for (auto& cache: caches_) { + // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!) + auto full_list = cache.options->full_list(); + if (!full_list) { + auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id; + centers_tensor = centers_tensor.masked_select(half_list_mask); + neighbors_tensor = neighbors_tensor.masked_select(half_list_mask); + centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask); + neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask); + } + + // distance mask + auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor); + auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff; + + // index everything with the mask + auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask); + auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask); + auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()}); + + // find filtered interatomic vectors using the original atoms + auto interatomic_vectors_original_filtered = positions_tensor.index_select(0, neighbors_tensor_original_id_filtered) - positions_tensor.index_select(0, centers_tensor_original_id_filtered); + + // cell shifts + auto pair_shifts = interatomic_vectors_filtered - interatomic_vectors_original_filtered; + auto cell_shifts = pair_shifts.matmul(cell_inv_tensor); + cell_shifts = torch::round(cell_shifts).to(torch::kInt32); + + if (!full_list) { + auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered; + auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0; + // reproduce this mask with torch: + // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0))) + auto edge_mask = ( + torch::sum(cell_shifts, 1) == 0 & ( + cell_shifts.index({torch::indexing::Slice(), 2}) < 0 | ( + cell_shifts.index({torch::indexing::Slice(), 2}) == 0 & + cell_shifts.index({torch::indexing::Slice(), 1}) < 0 + ) + ) + ); + auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask)); + centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask); + neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask); + interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()}); + cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()}); + } + + centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1); + neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1); + auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1); + + auto [samples_values_unique, samples_inverse, _] = torch::unique_dim( + samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false + ); + + auto permutation = torch::arange(samples_inverse.size(0), samples_inverse.options()); + samples_inverse = samples_inverse.flip({0}); + permutation = permutation.flip({0}); + + auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options()); + sample_indices.scatter_(0, samples_inverse, permutation); + + auto samples = torch::make_intrusive( + std::vector{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"}, + samples_values_unique + ); + + auto neighbor_list = torch::make_intrusive( + interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device), + samples->to(device), + std::vector{ + metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device), + }, + metatensor_torch::LabelsHolder::create({"distance"}, {{0}})->to(device) + ); + + metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency); + system->add_neighbor_list(cache.options, neighbor_list); + } +} + + +template +metatensor_torch::System MetatensorSystemAdaptorKokkos::system_from_lmp( + bool do_virial, + torch::ScalarType dtype, + torch::Device device +) { + // std::cout << "MetatensorSystemAdaptorKokkos::system_from_lmp" << std::endl; + auto total_n_atoms = atomKK->nlocal + atomKK->nghost; + + auto atom_types_lammps_kokkos = atomKK->k_type.view(); + auto mapping = options_.types_mapping_kokkos; + Kokkos::View atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms); /// Can be a class member? (allocation alert) + + Kokkos::parallel_for( + "MetatensorSystemAdaptorKokkos::system_from_lmp::atom_types_mapping", + Kokkos::RangePolicy(0, total_n_atoms), + KOKKOS_LAMBDA(int i) + { + atom_types_metatensor_kokkos(i) = mapping(atom_types_lammps_kokkos(i)); + }); + + atomic_types_ = torch::from_blob( + atom_types_metatensor_kokkos.data(), + {total_n_atoms}, + torch::TensorOptions().dtype(torch::kInt32).device(device) + ).clone(); /// Again, allocation alert. Not sure if this can be avoided + + auto tensor_options = torch::TensorOptions().dtype(torch::kFloat64).device(device); + + // atom->x contains "real" and then ghost atoms, in that order + auto positions_kokkos = atomKK->k_x.view(); + this->positions = torch::from_blob( + positions_kokkos.data(), {total_n_atoms, 3}, + // requires_grad=true since we always need gradients w.r.t. positions + tensor_options + ).clone().requires_grad_(true); /// Allocation alert (clone) + + auto cell = torch::zeros({3, 3}, tensor_options); /// Allocation alert, we could make it a class member and allocate it once + /// domain doesn't seem to have a Kokkos version + cell[0][0] = domain->xprd; + + cell[1][0] = domain->xy; + cell[1][1] = domain->yprd; + + cell[2][0] = domain->xz; + cell[2][1] = domain->yz; + cell[2][2] = domain->zprd; + /// And the other elements? Are they always zero? + + auto system_positions = this->positions; + cell = cell.to(dtype).to(device); /// to(device) alert. How do we find the cell on Kokkos? + + if (do_virial) { + auto model_strain = this->strain.to(dtype); /// already on the correct device + + // pretend to scale positions/cell by the strain so that + // it enters the computational graph. + system_positions = system_positions.matmul(model_strain); + cell = cell.matmul(model_strain); + } + + auto system = torch::make_intrusive( + atomic_types_, + system_positions, + cell + ); + + this->setup_neighbors(system); + return system; +} + +namespace LAMMPS_NS { +template class MetatensorNeighborsDataKokkos; +template class MetatensorSystemAdaptorKokkos; +} diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h new file mode 100644 index 00000000000..3a16384c379 --- /dev/null +++ b/src/KOKKOS/metatensor_system_kokkos.h @@ -0,0 +1,141 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef LMP_METATENSOR_SYSTEM_KOKKOS_H +#define LMP_METATENSOR_SYSTEM_KOKKOS_H + +#include +#include +#include + +#include "pointers.h" +#include "pair.h" +#include "neigh_list.h" +#include "kokkos.h" + +#include + + +namespace LAMMPS_NS { + +template +struct MetatensorSystemOptionsKokkos { + // Mapping from LAMMPS types to metatensor types + const int32_t* types_mapping; + const Kokkos::View types_mapping_kokkos; + // interaction range of the model, in LAMMPS units + double interaction_range; + // should we run extra checks on the neighbor lists? + bool check_consistency; +}; + +// data for metatensor neighbors lists +template +struct MetatensorNeighborsDataKokkos { + // single neighbors sample containing [i, j, S_a, S_b, S_c] + using sample_t = std::array; + + struct SampleHasher { + static void hash_combine(std::size_t& seed, const int32_t& v) { + seed ^= std::hash()(v) + 0x9e3779b9 + (seed<<6) + (seed>>2); + } + + size_t operator()(const sample_t& s) const { + size_t hash = 0; + hash_combine(hash, s[0]); + hash_combine(hash, s[1]); + hash_combine(hash, s[2]); + hash_combine(hash, s[3]); + hash_combine(hash, s[4]); + return hash; + } + }; + + // cutoff for this NL in LAMMPS units + double cutoff; + // options of the NL as requested by the model + metatensor_torch::NeighborListOptions options; + + // Below are cached allocations for the LAMMPS -> metatensor NL translation + // TODO: report memory usage for these? + + // we keep the set of samples twice: once in `known_samples` to remove + // duplicated pairs, and once in `samples` in a format that can be + // used to create a torch::Tensor. + std::unordered_set known_samples; + std::vector samples; + // pairs distances vectors + std::vector> distances_f64; + std::vector> distances_f32; +}; + +template +class MetatensorSystemAdaptorKokkos : public Pointers { +public: + MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos options); + MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos options); + + ~MetatensorSystemAdaptorKokkos(); + + void init_list(int id, NeighList* ptr); + + + void add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request); + + // Create a metatensor system matching the LAMMPS system data + metatensor_torch::System system_from_lmp(bool do_virial, torch::ScalarType dtype, torch::Device device); + + // Explicit strain for virial calculations. This uses the same dtype/device + // as LAMMPS data (positions, …) + torch::Tensor strain; + // keep the positions as coming from LAMMPS (before any dtype/device + // conversion) to access its gradient + torch::Tensor positions; + +private: + // setup the metatensor neighbors list from the internal LAMMPS one + void setup_neighbors(metatensor_torch::System& system); + + // options for this system adaptor + MetatensorSystemOptionsKokkos options_; + + // LAMMPS NL + NeighList* list_; + // allocations caches for all the NL requested by + // the model + std::vector> caches_; + // allocation cache for the atomic types in the system + torch::Tensor atomic_types_; + // allocation cache holding the "original atom" id for all atoms in the + // system. This is the same as the atom id for all local atoms. For ghost + // atoms, this is either the id of the corresponding local atom if the ghost + // is a periodic image of a local atom, the id of the first ghost we found + // with a given atom tag if the ghost is a periodic image of another ghost; + // or the id of the ghost in all other cases. + std::vector original_atom_id_; + // allocation cache holding the map from atom tag to atom id for local + // atoms. + std::unordered_map local_atoms_tags_; + // allocation cache holding the map from atom tag to atom id for ghost + // atoms. When there are multiple periodic images of the same atom, only one + // will be included here. + std::unordered_map ghost_atoms_tags_; + + // TODO: should we use LAMMPS allocations/deallocation facilities for the + // allocation caches? If we don't, should we report memory usage from the + // allocations caches to LAMMPS one way or another? +}; + +} // namespace LAMMPS_NS + +#endif diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp new file mode 100644 index 00000000000..51f7f92fc11 --- /dev/null +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -0,0 +1,637 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing authors: Guillaume Fraux +------------------------------------------------------------------------- */ +#include "pair_metatensor_kokkos.h" + +#include "atom.h" +#include "error.h" +#include "force.h" +#include "memory.h" +#include "neighbor.h" +#include "update.h" +#include "citeme.h" +#include "comm.h" + +#include "neigh_list.h" + +#include "kokkos.h" +#include "atom_kokkos.h" +#include "pair_kokkos.h" +#include "atom_masks.h" + +#include +#include +#include + +#if TORCH_VERSION_MAJOR >= 2 + #include +#endif + +#include + +#include +#include + +#include "metatensor_system_kokkos.h" + +#ifndef KOKKOS_ENABLE_CUDA +namespace Kokkos { +class Cuda {}; +} // namespace Kokkos +#endif // KOKKOS_ENABLE_CUDA + +using namespace LAMMPS_NS; + +struct LAMMPS_NS::PairMetatensorDataKokkos { + PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit); + + void load_model(LAMMPS* lmp, const char* path, const char* extensions_directory); + + // torch model in metatensor format + std::unique_ptr model; + // device to use for the calculations + torch::Device device; + // model capabilities, declared by the model + metatensor_torch::ModelCapabilities capabilities; + // run-time evaluation options, decided by this class + metatensor_torch::ModelEvaluationOptions evaluation_options; + // should metatensor check the data LAMMPS send to the model + // and the data the model returns? + bool check_consistency; + // how far away the model needs to know about neighbors + double interaction_range; + + // allocation cache for the selected atoms + torch::Tensor selected_atoms_values; + // adaptor from LAMMPS system to metatensor's + std::unique_ptr> system_adaptor; +}; + +PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit): + system_adaptor(nullptr), + device(torch::kCPU), + check_consistency(false), + interaction_range(-1) +{ + auto options = torch::TensorOptions().dtype(torch::kInt32); + this->selected_atoms_values = torch::zeros({0, 2}, options); + + // default to true for now, this will be changed to false later + this->check_consistency = true; + + // Initialize evaluation_options + this->evaluation_options = torch::make_intrusive(); + this->evaluation_options->set_length_unit(std::move(length_unit)); + + auto output = torch::make_intrusive(); + output->explicit_gradients = {}; + output->set_quantity("energy"); + output->set_unit(std::move(energy_unit)); + output->per_atom = false; + + this->evaluation_options->outputs.insert("energy", output); +} + +void PairMetatensorDataKokkos::load_model( + LAMMPS* lmp, + const char* path, + const char* extensions_directory +) { + // TODO: seach for the model & extensions inside `$LAMMPS_POTENTIALS`? + + if (this->model != nullptr) { + lmp->error->all(FLERR, "torch model is already loaded"); + } + + torch::optional extensions = torch::nullopt; + if (extensions_directory != nullptr) { + extensions = std::string(extensions_directory); + } + + try { + this->model = std::make_unique( + metatensor_torch::load_atomistic_model(path, extensions) + ); + } catch (const c10::Error& e) { + lmp->error->all(FLERR, "failed to load metatensor model at '{}': {}", path, e.what()); + } + + auto capabilities_ivalue = this->model->run_method("capabilities"); + this->capabilities = capabilities_ivalue.toCustomClass(); + + if (!this->capabilities->outputs().contains("energy")) { + lmp->error->all(FLERR, "the model at '{}' does not have an \"energy\" output, we can not use it in pair_style metatensor", path); + } + + if (lmp->comm->me == 0) { + auto metadata_ivalue = this->model->run_method("metadata"); + auto metadata = metadata_ivalue.toCustomClass(); + auto to_print = metadata->print(); + + if (lmp->screen) { + fprintf(lmp->screen, "\n%s\n", to_print.c_str()); + } + if (lmp->logfile) { + fprintf(lmp->logfile,"\n%s\n", to_print.c_str()); + } + + // add the model references to LAMMPS citation handling mechanism + for (const auto& it: metadata->references) { + for (const auto& ref: it.value()) { + lmp->citeme->add(ref + "\n"); + } + } + } +} + + +/* ---------------------------------------------------------------------- */ + +template +PairMetatensorKokkos::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) { + std::string energy_unit; + std::string length_unit; + if (strcmp(update->unit_style, "real") == 0) { + length_unit = "angstrom"; + energy_unit = "kcal/mol"; + } else if (strcmp(update->unit_style, "metal") == 0) { + length_unit = "angstrom"; + energy_unit = "eV"; + } else if (strcmp(update->unit_style, "si") == 0) { + length_unit = "meter"; + energy_unit = "joule"; + } else if (strcmp(update->unit_style, "electron") == 0) { + length_unit = "Bohr"; + energy_unit = "Hartree"; + } else { + error->all(FLERR, "unsupported units '{}' for pair metatensor ", update->unit_style); + } + + // we might not be running a pure pair potential, + // so we can not compute virial as fdotr + this->no_virial_fdotr_compute = 1; + + this->mts_data = new PairMetatensorDataKokkos(std::move(length_unit), std::move(energy_unit)); +} + +template +PairMetatensorKokkos::~PairMetatensorKokkos() { + delete this->mts_data; + + if (allocated) { + memory->destroy(setflag); + memory->destroy(cutsq); + memory->destroy(type_mapping); + } +} + +// called when finding `pair_style metatensor` in the input +template +void PairMetatensorKokkos::settings(int argc, char ** argv) { + std::cout << "settings" << std::endl; + + if (argc == 0) { + error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc); + } + + const char* model_path = argv[0]; + const char* extensions_directory = nullptr; + const char* requested_device = nullptr; + for (int i=1; iall(FLERR, "expected after 'check_consistency' in pair_style metatensor, got nothing"); + } else if (strcmp(argv[i + 1], "on") == 0) { + mts_data->check_consistency = true; + } else if (strcmp(argv[i + 1], "off") == 0) { + mts_data->check_consistency = false; + } else { + error->all(FLERR, "expected after 'check_consistency' in pair_style metatensor, got '{}'", argv[i + 1]); + } + + i += 1; + } else if (strcmp(argv[i], "extensions") == 0) { + if (i == argc - 1) { + error->all(FLERR, "expected after 'extensions' in pair_style metatensor, got nothing"); + } + extensions_directory = argv[i + 1]; + i += 1; + } else if (strcmp(argv[i], "device") == 0) { + if (i == argc - 1) { + error->all(FLERR, "expected string after 'device' in pair_style metatensor, got nothing"); + } + requested_device = argv[i + 1]; + i += 1; + } else { + error->all(FLERR, "unexpected argument to pair_style metatensor: '{}'", argv[i]); + } + } + + mts_data->load_model(this->lmp, model_path, extensions_directory); + + // Select the device to use based on the model's preference, the user choice + // and what's available. + auto available_devices = std::vector(); + for (const auto& device: mts_data->capabilities->supported_devices) { + if (device == "cpu") { + available_devices.push_back(torch::kCPU); + } else if (device == "cuda") { + if (torch::cuda::is_available()) { + // Get a MPI communicator for all processes on the current node + MPI_Comm local; + MPI_Comm_split_type(world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local); + // Get the rank of this MPI process on the current node + int local_rank; + MPI_Comm_rank(local, &local_rank); + + int size; + MPI_Comm_size(local, &size); + if (size < torch::cuda::device_count()) { + if (comm->me == 0) { + error->warning(FLERR, + "found {} CUDA-capable GPUs, but only {} MPI processes on the current node; the remaining GPUs will not be used", + torch::cuda::device_count(), size + ); + } + } + + // split GPUs between node-local processes using round-robin allocation + int gpu_to_use = local_rank % torch::cuda::device_count(); + available_devices.push_back(torch::Device(torch::kCUDA, gpu_to_use)); + } + } else if (device == "mps") { + #if TORCH_VERSION_MAJOR >= 2 + if (torch::mps::is_available()) { + available_devices.push_back(torch::Device("mps")); + } + #endif + } else { + error->warning(FLERR, + "the model declared support for unknown device '{}', it will be ignored", device + ); + } + } + + if (available_devices.empty()) { + error->all(FLERR, + "failed to find a valid device for the model at '{}': " + "the model supports {}, none of these where available", /// typo: where -> were + model_path, torch::str(mts_data->capabilities->supported_devices) + ); + } + + if (requested_device == nullptr) { + // no user request, pick the device the model prefers + mts_data->device = available_devices[0]; + } else { + bool found_requested_device = false; + for (const auto& device: available_devices) { + if (device.is_cpu() && strcmp(requested_device, "cpu") == 0) { + mts_data->device = device; + found_requested_device = true; + break; + } else if (device.is_cuda() && strcmp(requested_device, "cuda") == 0) { + mts_data->device = device; + found_requested_device = true; + break; + } else if (device.is_mps() && strcmp(requested_device, "mps") == 0) { + mts_data->device = device; + found_requested_device = true; + break; + } + } + + if (!found_requested_device) { + error->all(FLERR, + "failed to find requested device ({}): it is either " + "not supported by this model or not available on this machine", + requested_device + ); + } + } + + mts_data->model->to(mts_data->device); + + // Handle potential mismatch between Kokkos and model devices + if (std::is_same::value) { + if (!mts_data->device.is_cuda()) { + throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU"); + } + } else { + if (!mts_data->device.is_cpu()) { + throw std::runtime_error("Kokkos is running on the host, but the model is not on CPU"); + } + } + + auto message = "Running simulation on " + mts_data->device.str() + " device with " + mts_data->capabilities->dtype() + " data"; + if (screen) { + fprintf(screen, "%s\n", message.c_str()); + } + if (logfile) { + fprintf(logfile,"%s\n", message.c_str()); + } + + if (!allocated) { + allocate(); + } + + std::cout << "Running on " << typeid(ExecutionSpaceFromDevice::space).name() << std::endl; +} + + +template +void PairMetatensorKokkos::allocate() { + std::cout << "allocate" << std::endl; + + allocated = 1; + + // setflags stores whether the coeff for a given pair of atom types are known + /// I'm tempted to change this one to kokkos but I can't find how it's used + /// Commented out for now + setflag = memory->create( + setflag, + atom->ntypes + 1, + atom->ntypes + 1, + "pair:setflag" + ); + + for (int i = 1; i <= atom->ntypes; i++) { + for (int j = i; j <= atom->ntypes; j++) { + setflag[i][j] = 0; + } + } + + /// I noticed that this cutsq isn't used in the code and is not + /// necessary to run it. Commented out for now + + // cutsq stores the squared cutoff for each pair + cutsq = memory->create( + cutsq, + atom->ntypes + 1, + atom->ntypes + 1, + "pair:cutsq" + ); + + // lammps_types_to_species stores the mapping from lammps atom types to + // the metatensor model species + /// This will stay non-kokkos for now (only used at initialization) + type_mapping = memory->create( + type_mapping, + atom->ntypes + 1, + "PairMetatensor:type_mapping" + ); + + for (int i = 1; i <= atom->ntypes; i++) { + type_mapping[i] = -1; + } +} + +template +double PairMetatensorKokkos::init_one(int, int) { + std::cout << "init_one" << std::endl; + return mts_data->interaction_range; +} + + +// called on pair_coeff +template +void PairMetatensorKokkos::coeff(int argc, char ** argv) { + std::cout << "coeff" << std::endl; + if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) { + error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * `"); + } + + if (atom->ntypes != argc - 2) { + error->all(FLERR, + "invalid pair_coeff, expected `pair_coeff * * ` with {} types", + atom->ntypes + ); + } + + for (int lammps_type=1; lammps_typentypes; i++) { + for (int j = 1; j <= atom->ntypes; j++) { + setflag[i][j] = 1; + setflag[j][i] = 1; + } + } +} + + +// called when the run starts +template +void PairMetatensorKokkos::init_style() { + std::cout << "init_style" << std::endl; + // Require newton pair on since we need to communicate forces accumulated on + // ghost atoms to neighboring domains. These forces contributions come from + // gradient of a local descriptor w.r.t. domain ghosts (periodic images + // ghosts are handled separately). + /// Would be good if we could change this because Newton off is the Kokkos default + if (force->newton_pair != 1) { + error->all(FLERR, "Pair style metatensor requires newton pair on"); + } + + // get the model's interaction range + auto range = mts_data->capabilities->engine_interaction_range(mts_data->evaluation_options->length_unit()); + if (range < 0) { + error->all(FLERR, "interaction_range is negative for this model"); + } else if (!std::isfinite(range)) { + error->all(FLERR, "interaction_range is infinite for this model, this is not yet supported"); + } else { + mts_data->interaction_range = range; + } + + /// create Kokkos view for type_mapping + Kokkos::View type_mapping_kokkos("type_mapping", atom->ntypes + 1); + /// copy type_mapping to the Kokkos view (via a host mirror view) + auto type_mapping_kokkos_host = Kokkos::create_mirror_view(type_mapping_kokkos); + for (int i = 0; i < atom->ntypes + 1; i++) { + type_mapping_kokkos_host(i) = type_mapping[i]; + } + Kokkos::deep_copy(type_mapping_kokkos, type_mapping_kokkos_host); + + // create system adaptor + auto options = MetatensorSystemOptionsKokkos{ + this->type_mapping, + type_mapping_kokkos, + mts_data->interaction_range, + mts_data->check_consistency, + }; + mts_data->system_adaptor = std::make_unique>(lmp, this, options); + + // Translate from the metatensor neighbor lists requests to LAMMPS neighbor + // lists requests. + auto requested_nl = mts_data->model->run_method("requested_neighbor_lists"); + for (const auto& ivalue: requested_nl.toList()) { + auto options = ivalue.get().toCustomClass(); + auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit()); + + mts_data->system_adaptor->add_nl_request(cutoff, options); + } +} + + +template +void PairMetatensorKokkos::init_list(int id, NeighList *ptr) { + std::cout << "init_list" << std::endl; + mts_data->system_adaptor->init_list(id, ptr); + std::cout << "init_list done" << std::endl; +} + + +template +void PairMetatensorKokkos::compute(int eflag, int vflag) { + // auto x = atomKK->k_x.view(); + // auto h_array = Kokkos::create_mirror_view(d_array); + // Kokkos::deep_copy(h_array, d_array); + // // Print the values on the host + // for (int i = 0; i < 32; ++i) { + // for (int j = 0; j < 3; ++j) { + // std::cout << h_array(i, j) << " "; + // } + // std::cout << std::endl; + // } + + /// Declare what we need to read from the atomKK object and what we will modify + atomKK->sync(ExecutionSpaceFromDevice::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); + this->atomKK->modified(ExecutionSpaceFromDevice::space, ENERGY_MASK | F_MASK | VIRIAL_MASK); + + if (eflag || vflag) { + ev_setup(eflag, vflag); + } else { + evflag = vflag_fdotr = eflag_global = eflag_atom = 0; + } + + if (eflag_atom) { + mts_data->evaluation_options->outputs.at("energy")->per_atom = true; + } else { + mts_data->evaluation_options->outputs.at("energy")->per_atom = false; + } + + auto dtype = torch::kFloat64; + if (mts_data->capabilities->dtype() == "float64") { + dtype = torch::kFloat64; + } else if (mts_data->capabilities->dtype() == "float32") { + dtype = torch::kFloat32; + } else { + error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype()); + } + + // transform from LAMMPS to metatensor System + auto system = mts_data->system_adaptor->system_from_lmp( + static_cast(vflag_global), dtype, mts_data->device + ); + + // only run the calculation for atoms actually in the current domain + mts_data->selected_atoms_values.resize_({atom->nlocal, 2}); + for (int i=0; inlocal; i++) { + mts_data->selected_atoms_values[i][0] = 0; + mts_data->selected_atoms_values[i][1] = i; + } + auto selected_atoms = torch::make_intrusive( + std::vector{"system", "atom"}, mts_data->selected_atoms_values + ); + mts_data->evaluation_options->set_selected_atoms(selected_atoms->to(mts_data->device)); + + torch::IValue result_ivalue; + try { + result_ivalue = mts_data->model->forward({ + std::vector{system}, + mts_data->evaluation_options, + mts_data->check_consistency + }); + } catch (const std::exception& e) { + error->all(FLERR, "error evaluating the torch model: {}", e.what()); + } + + auto result = result_ivalue.toGenericDict(); + auto energy = result.at("energy").toCustomClass(); + auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values(); + auto energy_detached = energy_tensor.detach().to(torch::kCPU).to(torch::kFloat64); + + // store the energy returned by the model + torch::Tensor global_energy; + if (eflag_atom) { + auto energies = energy_detached.accessor(); + for (int i=0; inlocal + atom->nghost; i++) { + // TODO: handle out of order samples + eatom[i] += energies[i][0]; + } + + global_energy = energy_detached.sum(0); + assert(energy_detached.sizes() == std::vector({1})); + } else { + assert(energy_detached.sizes() == std::vector({1, 1})); + global_energy = energy_detached.reshape({1}); + } + + if (eflag_global) { + eng_vdwl += global_energy.item(); + } + + // reset gradients to zero before calling backward + mts_data->system_adaptor->positions.mutable_grad() = torch::Tensor(); + mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor(); + + // compute forces/virial with backward propagation + energy_tensor.backward(-torch::ones_like(energy_tensor)); + auto forces_tensor = mts_data->system_adaptor->positions.grad(); + assert(forces_tensor.scalar_type() == torch::kFloat64); + + auto forces_lammps_kokkos = this->atomKK->k_f. template view(); + /// Is it possible to do double*[3] here? + auto forces_metatensor_kokkos = Kokkos::View>(forces_tensor.contiguous().data_ptr(), atom->nlocal + atom->nghost, 3); + + Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atom->nlocal + atom->nghost, KOKKOS_LAMBDA(const int i) { + forces_lammps_kokkos(i, 0) += forces_metatensor_kokkos(i, 0); + forces_lammps_kokkos(i, 1) += forces_metatensor_kokkos(i, 1); + forces_lammps_kokkos(i, 2) += forces_metatensor_kokkos(i, 2); + }); + + assert(!vflag_fdotr); + + if (vflag_global) { + auto virial_tensor = mts_data->system_adaptor->strain.grad(); + assert(virial_tensor.scalar_type() == torch::kFloat64); + + // apparently the cell is not supported in Kokkos format, + // so it has to be updated on CPU (??) + auto predicted_virial_tensor_cpu = virial_tensor.cpu(); + auto predicted_virial = predicted_virial_tensor_cpu.accessor(); + + virial[0] += predicted_virial[0][0]; + virial[1] += predicted_virial[1][1]; + virial[2] += predicted_virial[2][2]; + + virial[3] += 0.5 * (predicted_virial[1][0] + predicted_virial[0][1]); + virial[4] += 0.5 * (predicted_virial[2][0] + predicted_virial[0][2]); + virial[5] += 0.5 * (predicted_virial[2][1] + predicted_virial[1][2]); + } + + if (vflag_atom) { + error->all(FLERR, "per atom virial is not implemented"); + } +} + +namespace LAMMPS_NS { +template class PairMetatensorKokkos; +/// TODO: Host version +} diff --git a/src/KOKKOS/pair_metatensor_kokkos.h b/src/KOKKOS/pair_metatensor_kokkos.h new file mode 100644 index 00000000000..8f5f144cec5 --- /dev/null +++ b/src/KOKKOS/pair_metatensor_kokkos.h @@ -0,0 +1,59 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + https://www.lammps.org/, Sandia National Laboratories + LAMMPS Development team: developers@lammps.org + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ +#ifdef PAIR_CLASS +// clang-format off +PairStyle(metatensor/kk, PairMetatensorKokkos); +// clang-format on +#else + +#ifndef LMP_PAIR_METATENSOR_KOKKOS_H +#define LMP_PAIR_METATENSOR_KOKKOS_H + +#include "kokkos_base.h" +#include "pair_kokkos.h" + +namespace LAMMPS_NS { + +template +class MetatensorSystemAdaptorKokkos; + +struct PairMetatensorDataKokkos; + +/// I noticed that most other kokkos packages inherit from their non-kokkos +/// counterparts. It doesn't look like a good idea to me because +/// they end up overriding everything... Not doing it here for now. +template +class PairMetatensorKokkos : public Pair, public KokkosBase { +public: + PairMetatensorKokkos(class LAMMPS *); + ~PairMetatensorKokkos(); + + void compute(int, int) override; + void settings(int, char **) override; + void coeff(int, char **) override; + void init_style() override; + double init_one(int, int) override; + void init_list(int id, NeighList *ptr) override; + + void allocate(); +private: + PairMetatensorDataKokkos* mts_data; + + // mapping from LAMMPS types to metatensor types + int32_t* type_mapping; +}; + +} // namespace LAMMPS_NS + +#endif +#endif diff --git a/src/ML-METATENSOR/pair_metatensor.cpp b/src/ML-METATENSOR/pair_metatensor.cpp index 2f606b45525..6777afc5cb6 100644 --- a/src/ML-METATENSOR/pair_metatensor.cpp +++ b/src/ML-METATENSOR/pair_metatensor.cpp @@ -518,7 +518,7 @@ void PairMetatensor::compute(int eflag, int vflag) { auto samples_values = energy_samples->values().to(torch::kCPU); auto samples = samples_values.accessor(); - int64_t n_atoms = atom->nlocal + atom->nghost; + // int64_t n_atoms = atom->nlocal + atom->nghost; assert(samples_values.sizes() == mts_data->selected_atoms_values.sizes()); auto energies = energy_detached.accessor(); From f4fe5ad2604fd407dcfd9077fdad775d28ab5275 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Tue, 16 Jul 2024 12:59:05 +0200 Subject: [PATCH 02/15] Small GPU fixes --- examples/PACKAGES/metatensor/in.metatensor | 4 ++-- src/KOKKOS/metatensor_system_kokkos.cpp | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor index 59a32c89e4a..708f852f88c 100644 --- a/examples/PACKAGES/metatensor/in.metatensor +++ b/examples/PACKAGES/metatensor/in.metatensor @@ -17,9 +17,9 @@ pair_style metatensor nickel-lj.pt pair_coeff * * 28 timestep 0.001 -fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0 +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(100 * dt) -thermo 10 +thermo 1 thermo_style custom step temp pe etotal press vol # dump 1 all atom 10 dump.metatensor diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index e4fc076e04f..91e37324d88 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -211,13 +211,15 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors(metatensor_to auto centers_tensor = torch::from_blob( centers.data(), {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(device) + torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) ); + centers_tensor = centers_tensor.to(device); auto neighbors_tensor = torch::from_blob( neighbors.data(), {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(device) + torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) ); + neighbors_tensor = neighbors_tensor.to(device); // change centers and neighbors to the original atom ids auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor); From 1699bfedf1468d2c0a091f3999cd1de6a61cbbd4 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Mon, 21 Oct 2024 17:14:28 +0200 Subject: [PATCH 03/15] Profile and fix speed issues --- examples/PACKAGES/metatensor/in.kokkos.metatensor | 10 +++++----- examples/PACKAGES/metatensor/in.metatensor | 6 +++--- examples/PACKAGES/metatensor/readme.txt | 6 +++--- src/KOKKOS/metatensor_system_kokkos.cpp | 8 +++++--- src/KOKKOS/pair_metatensor_kokkos.cpp | 3 ++- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor index 39c8ae38cd2..39a1cf644b0 100644 --- a/examples/PACKAGES/metatensor/in.kokkos.metatensor +++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor @@ -3,7 +3,7 @@ boundary p p p atom_style atomic/kk lattice fcc 3.6 -region box block 0 2 0 2 0 2 +region box block 0 8 0 8 0 8 create_box 1 box create_atoms 1 box @@ -14,15 +14,15 @@ velocity all create 123 42 run_style verlet/kk -pair_style metatensor/kk nickel-lj.pt device cuda +pair_style metatensor/kk nickel-lj.pt device cuda check_consistency off pair_coeff * * 28 timestep 0.001 fix 1 all nve -thermo 1 -thermo_style custom step temp pe etotal press vol +thermo 100 +thermo_style custom step temp pe etotal press vol cpu # dump 1 all atom 10 dump.metatensor -run 100 +run 1000 diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor index b2e971c188d..9b93563a5c9 100644 --- a/examples/PACKAGES/metatensor/in.metatensor +++ b/examples/PACKAGES/metatensor/in.metatensor @@ -19,9 +19,9 @@ pair_coeff * * 28 timestep 0.001 fix 1 all nve -thermo 1 -thermo_style custom step temp pe etotal press vol +thermo 100 +thermo_style custom step temp pe etotal press vol cpu # dump 1 all atom 10 dump.metatensor -run 100 +run 1000 diff --git a/examples/PACKAGES/metatensor/readme.txt b/examples/PACKAGES/metatensor/readme.txt index e853f85d828..a09a5131448 100644 --- a/examples/PACKAGES/metatensor/readme.txt +++ b/examples/PACKAGES/metatensor/readme.txt @@ -5,10 +5,10 @@ To be compiled as cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON Run the example with -../../../build/lmp -k on g 1 -pk kokkos newton on -in in.metatensor_kokkos +../../../build/lmp -k on g 1 -pk kokkos newton on -in in.kokkos.metatensor and compare its output with the non-kokkos interface ../../../build/lmp -in in.metatensor -cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/home/filippo/code/virtualenvs/base/lib/python3.12/site-packages/torch/share/cmake/ -cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DCMAKE_PREFIX_PATH=/home/filippo/code/virtualenvs/base/lib/python3.12/site-packages/torch/share/cmake/ +cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/ +cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/ diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 254d93ae5ee..e5dc729de45 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -27,6 +27,9 @@ #include "kokkos.h" #include "atom_kokkos.h" +// #include +// #include + #ifndef KOKKOS_ENABLE_CUDA namespace Kokkos { class Cuda {}; @@ -134,8 +137,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64); // it might be a good idea to have this as float32 if the model is using float32 // to speed up the computation, especially on GPU - - + /*-------------- whatever, this will be done on CPU for now ------------------------*/ // Collect the local atom id of all local & ghosts atoms, mapping ghosts @@ -312,6 +314,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency); system->add_neighbor_list(cache.options, neighbor_list); } + } @@ -515,7 +518,6 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr torch::ScalarType dtype, torch::Device device ) { - // std::cout << "MetatensorSystemAdaptorKokkos::system_from_lmp" << std::endl; auto total_n_atoms = atomKK->nlocal + atomKK->nghost; auto atom_types_lammps_kokkos = atomKK->k_type.view(); diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index 18cd91376ed..12322410764 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -549,7 +549,7 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { // } /// Declare what we need to read from the atomKK object and what we will modify - atomKK->sync(ExecutionSpaceFromDevice::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); + this->atomKK->sync(ExecutionSpaceFromDevice::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); this->atomKK->modified(ExecutionSpaceFromDevice::space, ENERGY_MASK | F_MASK | VIRIAL_MASK); if (eflag || vflag) { @@ -579,6 +579,7 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { ); // only run the calculation for atoms actually in the current domain + // TODO: port to Kokkos mts_data->selected_atoms_values.resize_({atom->nlocal, 2}); for (int i=0; inlocal; i++) { mts_data->selected_atoms_values[i][0] = 0; From 8836c228ce0262a362061e11c2adbfb5c40145bb Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Mon, 21 Oct 2024 21:17:24 +0200 Subject: [PATCH 04/15] Fix dtypes --- src/KOKKOS/metatensor_system_kokkos.cpp | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index e5dc729de45..963ea7d5aeb 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -127,14 +127,13 @@ void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, template void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor_torch::System& system) { - // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl; auto dtype = system->positions().scalar_type(); auto device = system->positions().device(); auto positions_kokkos = this->atomKK->k_x. template view(); auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64); + auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); // it might be a good idea to have this as float32 if the model is using float32 // to speed up the computation, especially on GPU @@ -232,7 +231,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten positions_kokkos.data(), {total_n_atoms, 3}, torch::TensorOptions().dtype(torch::kFloat64).device(device) - ); + ).to(dtype); for (auto& cache: caches_) { // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!) @@ -320,14 +319,13 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten template void MetatensorSystemAdaptorKokkos::setup_neighbors_no_remap(metatensor_torch::System& system) { - // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl; auto dtype = system->positions().scalar_type(); auto device = system->positions().device(); auto positions_kokkos = this->atomKK->k_x. template view(); auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64); + auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); // it might be a good idea to have this as float32 if the model is using float32 // to speed up the computation, especially on GPU @@ -426,7 +424,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_no_remap(meta positions_kokkos.data(), {total_n_atoms, 3}, torch::TensorOptions().dtype(torch::kFloat64).device(device) - ); + ).to(dtype); for (auto& cache: caches_) { // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!) @@ -538,17 +536,17 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr torch::TensorOptions().dtype(torch::kInt32).device(device) ).clone(); /// Again, allocation alert. Not sure if this can be avoided - auto tensor_options = torch::TensorOptions().dtype(torch::kFloat64).device(device); - // atom->x contains "real" and then ghost atoms, in that order auto positions_kokkos = atomKK->k_x.view(); + auto tensor_options_positions = torch::TensorOptions().dtype(torch::kFloat64).device(device); this->positions = torch::from_blob( positions_kokkos.data(), {total_n_atoms, 3}, // requires_grad=true since we always need gradients w.r.t. positions - tensor_options + tensor_options_positions ).clone().requires_grad_(true); /// Allocation alert (clone) - auto cell = torch::zeros({3, 3}, tensor_options); /// Allocation alert, we could make it a class member and allocate it once + auto tensor_options_cell = torch::TensorOptions().dtype(dtype).device(device); + auto cell = torch::zeros({3, 3}, tensor_options_cell); /// Allocation alert, we could make it a class member and allocate it once /// domain doesn't seem to have a Kokkos version cell[0][0] = domain->xprd; @@ -560,7 +558,7 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr cell[2][2] = domain->zprd; /// And the other elements? Are they always zero? - auto system_positions = this->positions; + auto system_positions = this->positions.to(dtype); cell = cell.to(dtype).to(device); /// to(device) alert. How do we find the cell on Kokkos? if (do_virial) { From ecf5b4af9461dfd65914598d94772e34130c6572 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Tue, 22 Oct 2024 15:31:46 +0200 Subject: [PATCH 05/15] Use the Kokkos NL directly --- src/KOKKOS/metatensor_system_kokkos.cpp | 110 ++++++++++++++++-------- src/KOKKOS/metatensor_system_kokkos.h | 6 +- src/KOKKOS/pair_metatensor_kokkos.cpp | 59 ++++++++----- 3 files changed, 118 insertions(+), 57 deletions(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 963ea7d5aeb..0a42a6eb017 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -27,8 +27,8 @@ #include "kokkos.h" #include "atom_kokkos.h" -// #include -// #include +#include +#include #ifndef KOKKOS_ENABLE_CUDA namespace Kokkos { @@ -61,6 +61,8 @@ MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMM auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); request->set_id(0); request->set_cutoff(options_.interaction_range); + request->set_kokkos_host(0); + request->set_kokkos_device(1); this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true)); } @@ -127,6 +129,12 @@ void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, template void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor_torch::System& system) { + // auto start = std::chrono::high_resolution_clock::now(); + // auto end = std::chrono::high_resolution_clock::now(); + + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); + auto dtype = system->positions().scalar_type(); auto device = system->positions().device(); @@ -134,8 +142,6 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto total_n_atoms = atomKK->nlocal + atomKK->nghost; auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); - // it might be a good idea to have this as float32 if the model is using float32 - // to speed up the computation, especially on GPU /*-------------- whatever, this will be done on CPU for now ------------------------*/ @@ -188,39 +194,64 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten ); original_atom_id_tensor = original_atom_id_tensor.to(device); // RIP - // Accumulate total number of pairs - int total_number_of_pairs = 0; - for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { - total_number_of_pairs += list_->numneigh[ii]; - } - std::vector centers(total_number_of_pairs); - std::vector neighbors(total_number_of_pairs); + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << " CPU packaging and GPU transfer (1st part): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; - // Fill the centers and neighbors arrays with the original atom ids - int pair_index = 0; - for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { - auto atom_i = list_->ilist[ii]; - auto neighbors_ii = list_->firstneigh[ii]; - for (int jj=0; jjnumneigh[ii]; jj++) { - centers[pair_index] = atom_i; - neighbors[pair_index] = neighbors_ii[jj] & NEIGHMASK; - pair_index++; - } - } + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); - // Create torch tensors for the centers and neighbors arrays - auto centers_tensor = torch::from_blob( - centers.data(), - {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) + + NeighListKokkos* list_kk = static_cast*>(this->list_); + + auto numneigh_kk = list_kk->d_numneigh; + auto neighbors_kk = list_kk->d_neighbors; + auto ilist_kk = list_kk->d_ilist; + + auto max_number_of_neighbors = list_kk->maxneighs; + + // mask neighbors_kk with NEIGHMASK. We take this opportunity to set the + // layout of this view to LayoutRight, which we need to feed the pointer to torch + Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); + Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) { + auto local_i = i / max_number_of_neighbors; + auto local_j = i % max_number_of_neighbors; + neighbors_kk_masked(local_i, local_j) = neighbors_kk(local_i, local_j) & NEIGHMASK; + }); + + auto numneigh_torch = torch::from_blob( + numneigh_kk.data(), + {total_n_atoms}, + torch::TensorOptions().dtype(torch::kInt32).device(device) ); - centers_tensor = centers_tensor.to(device); - auto neighbors_tensor = torch::from_blob( - neighbors.data(), - {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) + + auto neighbors_torch = torch::from_blob( + neighbors_kk_masked.data(), + {total_n_atoms, max_number_of_neighbors}, + torch::TensorOptions().dtype(torch::kInt32).device(device) ); - neighbors_tensor = neighbors_tensor.to(device); + + auto ilist_torch = torch::from_blob( + ilist_kk.data(), + {total_n_atoms}, + torch::TensorOptions().dtype(torch::kInt32).device(device) + ); + + auto expanded_arange = torch::arange(max_number_of_neighbors, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(0).expand({total_n_atoms, -1}); + auto neighbor_2d_mask = expanded_arange < numneigh_torch.unsqueeze(1); + + auto expanded_arange_other_dim = torch::arange(total_n_atoms, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(1).expand({-1, max_number_of_neighbors}); + auto index_for_ilist = expanded_arange_other_dim.masked_select(neighbor_2d_mask); + auto centers_tensor = ilist_torch.index_select(0, index_for_ilist); + + auto neighbors_tensor = neighbors_torch.masked_select(neighbor_2d_mask); + + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << " CPU packaging and GPU transfer (2nd part): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); // change centers and neighbors to the original atom ids auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor); @@ -296,13 +327,24 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options()); sample_indices.scatter_(0, samples_inverse, permutation); + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << " filtering out stuff: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); + auto samples = torch::make_intrusive( std::vector{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"}, samples_values_unique ); + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << " Time to create big labels: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + auto neighbor_list = torch::make_intrusive( - interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device), + interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1), samples->to(device), std::vector{ metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device), diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h index c1c661edb60..fbd788235af 100644 --- a/src/KOKKOS/metatensor_system_kokkos.h +++ b/src/KOKKOS/metatensor_system_kokkos.h @@ -107,7 +107,10 @@ class MetatensorSystemAdaptorKokkos : public Pointers { // conversion) to access its gradient torch::Tensor positions; -private: + + // These two are not private otherwise Kokkos can't see the lambdas + // defined inside them + // setup the metatensor neighbors list from the internal LAMMPS one, // remapping periodic ghosts to the corresponding local atom void setup_neighbors_remap(metatensor_torch::System& system); @@ -118,6 +121,7 @@ class MetatensorSystemAdaptorKokkos : public Pointers { // This produces a larger NL but skips the cost of the remapping void setup_neighbors_no_remap(metatensor_torch::System& system); +private: // options for this system adaptor MetatensorSystemOptionsKokkos options_; diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index 12322410764..bb055f11cf8 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -47,6 +47,8 @@ #include "metatensor_system_kokkos.h" +#include + #ifndef KOKKOS_ENABLE_CUDA namespace Kokkos { class Cuda {}; @@ -77,8 +79,6 @@ struct LAMMPS_NS::PairMetatensorDataKokkos { // how far away the model needs to know about neighbors double max_cutoff; - // allocation cache for the selected atoms - torch::Tensor selected_atoms_values; // adaptor from LAMMPS system to metatensor's std::unique_ptr> system_adaptor; }; @@ -90,9 +90,6 @@ PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std: remap_pairs(true), max_cutoff(-1) { - auto options = torch::TensorOptions().dtype(torch::kInt32); - this->selected_atoms_values = torch::zeros({0, 2}, options); - // default to true for now, this will be changed to false later this->check_consistency = true; @@ -205,8 +202,6 @@ PairMetatensorKokkos::~PairMetatensorKokkos() { // called when finding `pair_style metatensor` in the input template void PairMetatensorKokkos::settings(int argc, char ** argv) { - std::cout << "settings" << std::endl; - if (argc == 0) { error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc); } @@ -370,8 +365,6 @@ void PairMetatensorKokkos::settings(int argc, char ** argv) { template void PairMetatensorKokkos::allocate() { - std::cout << "allocate" << std::endl; - allocated = 1; // setflags stores whether the coeff for a given pair of atom types are known @@ -417,7 +410,6 @@ void PairMetatensorKokkos::allocate() { template double PairMetatensorKokkos::init_one(int, int) { - std::cout << "init_one" << std::endl; return mts_data->max_cutoff; } @@ -425,7 +417,6 @@ double PairMetatensorKokkos::init_one(int, int) { // called on pair_coeff template void PairMetatensorKokkos::coeff(int argc, char ** argv) { - std::cout << "coeff" << std::endl; if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) { error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * `"); } @@ -455,7 +446,6 @@ void PairMetatensorKokkos::coeff(int argc, char ** argv) { // called when the run starts template void PairMetatensorKokkos::init_style() { - std::cout << "init_style" << std::endl; // Require newton pair on since we need to communicate forces accumulated on // ghost atoms to neighboring domains. These forces contributions come from // gradient of a local descriptor w.r.t. domain ghosts (periodic images @@ -529,14 +519,15 @@ void PairMetatensorKokkos::init_style() { template void PairMetatensorKokkos::init_list(int id, NeighList *ptr) { - std::cout << "init_list" << std::endl; mts_data->system_adaptor->init_list(id, ptr); - std::cout << "init_list done" << std::endl; } template void PairMetatensorKokkos::compute(int eflag, int vflag) { + // auto start = std::chrono::high_resolution_clock::now(); + // auto end = std::chrono::high_resolution_clock::now(); + // auto x = atomKK->k_x.view(); // auto h_array = Kokkos::create_mirror_view(d_array); // Kokkos::deep_copy(h_array, d_array); @@ -573,24 +564,35 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype()); } + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); + // transform from LAMMPS to metatensor System auto system = mts_data->system_adaptor->system_from_lmp( static_cast(vflag_global), mts_data->remap_pairs, dtype, mts_data->device ); + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << "sys-from-lmp: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; + // only run the calculation for atoms actually in the current domain - // TODO: port to Kokkos - mts_data->selected_atoms_values.resize_({atom->nlocal, 2}); - for (int i=0; inlocal; i++) { - mts_data->selected_atoms_values[i][0] = 0; - mts_data->selected_atoms_values[i][1] = i; - } + auto tensor_options = torch::TensorOptions().dtype(torch::kInt32).device(mts_data->device); + torch::Tensor selected_atoms_values = torch::stack({ + torch::zeros({atom->nlocal}, tensor_options), + torch::arange(atom->nlocal, tensor_options) + }, -1); + auto selected_atoms = torch::make_intrusive( - std::vector{"system", "atom"}, mts_data->selected_atoms_values + std::vector{"system", "atom"}, selected_atoms_values ); - mts_data->evaluation_options->set_selected_atoms(selected_atoms->to(mts_data->device)); + mts_data->evaluation_options->set_selected_atoms(selected_atoms); torch::IValue result_ivalue; + + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); + try { result_ivalue = mts_data->model->forward({ std::vector{system}, @@ -601,6 +603,10 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { error->all(FLERR, "error evaluating the torch model: {}", e.what()); } + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << "Time taken forward: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; + auto result = result_ivalue.toGenericDict(); auto energy = result.at("energy").toCustomClass(); auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values(); @@ -631,7 +637,16 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor(); // compute forces/virial with backward propagation + + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); + energy_tensor.backward(-torch::ones_like(energy_tensor)); + + // torch::cuda::synchronize(); + // end = std::chrono::high_resolution_clock::now(); + // std::cout << "Time taken backward: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; + auto forces_tensor = mts_data->system_adaptor->positions.grad(); assert(forces_tensor.scalar_type() == torch::kFloat64); From ea9a29fb1b75baecde5f2158c5326c92681a276c Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Wed, 23 Oct 2024 09:49:13 +0200 Subject: [PATCH 06/15] Use transposed kokkos NL for a better memory layout --- src/KOKKOS/metatensor_system_kokkos.cpp | 25 +++++++++++++------------ src/KOKKOS/pair_metatensor_kokkos.cpp | 3 +++ 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 0a42a6eb017..e96cbda17c8 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -132,18 +132,20 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten // auto start = std::chrono::high_resolution_clock::now(); // auto end = std::chrono::high_resolution_clock::now(); - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - auto dtype = system->positions().scalar_type(); auto device = system->positions().device(); auto positions_kokkos = this->atomKK->k_x. template view(); auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); + // torch::cuda::synchronize(); + // start = std::chrono::high_resolution_clock::now(); /*-------------- whatever, this will be done on CPU for now ------------------------*/ + // The cost of this section seems to be very low + + // There is no kokkos cell in LAMMPS, so we need to transfer + auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); // Collect the local atom id of all local & ghosts atoms, mapping ghosts // atoms which are periodic images of local atoms back to the local atoms. @@ -185,7 +187,6 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten } } } - /*----------- end of whatever, this will be done on CPU for now --------------*/ auto original_atom_id_tensor = torch::from_blob( original_atom_id_.data(), @@ -196,22 +197,22 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten // torch::cuda::synchronize(); // end = std::chrono::high_resolution_clock::now(); - // std::cout << " CPU packaging and GPU transfer (1st part): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + // std::cout << " ghost mapping (CPU): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + + /*----------- end of whatever, this will be done on CPU for now --------------*/ // torch::cuda::synchronize(); // start = std::chrono::high_resolution_clock::now(); - NeighListKokkos* list_kk = static_cast*>(this->list_); auto numneigh_kk = list_kk->d_numneigh; - auto neighbors_kk = list_kk->d_neighbors; + auto neighbors_kk = list_kk->d_neighbors_transpose; auto ilist_kk = list_kk->d_ilist; auto max_number_of_neighbors = list_kk->maxneighs; - // mask neighbors_kk with NEIGHMASK. We take this opportunity to set the - // layout of this view to LayoutRight, which we need to feed the pointer to torch + // mask neighbors_kk with NEIGHMASK Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) { auto local_i = i / max_number_of_neighbors; @@ -248,7 +249,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten // torch::cuda::synchronize(); // end = std::chrono::high_resolution_clock::now(); - // std::cout << " CPU packaging and GPU transfer (2nd part): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + // std::cout << " NL format conversion: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; // torch::cuda::synchronize(); // start = std::chrono::high_resolution_clock::now(); @@ -329,7 +330,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten // torch::cuda::synchronize(); // end = std::chrono::high_resolution_clock::now(); - // std::cout << " filtering out stuff: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; + // std::cout << " NL filtering: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; // torch::cuda::synchronize(); // start = std::chrono::high_resolution_clock::now(); diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index bb055f11cf8..5cabe04f376 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -359,6 +359,9 @@ void PairMetatensorKokkos::settings(int argc, char ** argv) { allocate(); } + // this will allow us to receive the NL in a GPU-friendly format + this->lmp->kokkos->neigh_transpose = 1; + std::cout << "Running on " << typeid(ExecutionSpaceFromDevice::space).name() << std::endl; } From f0352aedcbe68139a6efbfe5aec871d56b360cdc Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Thu, 24 Oct 2024 13:45:22 +0200 Subject: [PATCH 07/15] Clean up --- doc/src/pair_metatensor.rst | 2 + examples/PACKAGES/metatensor/readme.txt | 20 +- src/KOKKOS/metatensor_system_kokkos.cpp | 391 ++++++------------------ src/KOKKOS/metatensor_system_kokkos.h | 16 +- src/KOKKOS/pair_metatensor_kokkos.cpp | 133 ++++---- src/KOKKOS/pair_metatensor_kokkos.h | 10 +- 6 files changed, 161 insertions(+), 411 deletions(-) diff --git a/doc/src/pair_metatensor.rst b/doc/src/pair_metatensor.rst index aad21c0e84d..fa89291796c 100644 --- a/doc/src/pair_metatensor.rst +++ b/doc/src/pair_metatensor.rst @@ -3,6 +3,8 @@ pair_style metatensor command ============================= +Accelerator Variants: *metatensor/kk* + Syntax """""" diff --git a/examples/PACKAGES/metatensor/readme.txt b/examples/PACKAGES/metatensor/readme.txt index a09a5131448..c3c3df36ff5 100644 --- a/examples/PACKAGES/metatensor/readme.txt +++ b/examples/PACKAGES/metatensor/readme.txt @@ -1,14 +1,14 @@ -Design taken from pace_kokkos with accessory files in their own directory. -Will probably need some cmake magic to copy them here from somewhere else. +The base package can be compiled as +cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/ +where /.../site-packages/torch/ is the path to a pip installation of torch -To be compiled as -cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON +The kokkos version should be compiled as +cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/ +where /.../libtorch/ is the path to a libtorch C++11 ABI distribution (which can be downloaded from https://pytorch.org/get-started/locally/). +The OpenMP version (as opposed to the CUDA version) can be enabled with -DKokkos_ENABLE_OPENMP=ON instead of -DKokkos_ENABLE_CUDA=ON -Run the example with +The consistency between the two interfaces can be checked with ../../../build/lmp -k on g 1 -pk kokkos newton on -in in.kokkos.metatensor -and compare its output with the non-kokkos interface +(or `t Nt` instead of `g 1` for an OpenMP run with Nt threads) +and the output can be compared with that of the plain metatensor interface ../../../build/lmp -in in.metatensor - - -cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/ -cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/ diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index e96cbda17c8..bbe7bfe98fd 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -31,6 +31,7 @@ #include #ifndef KOKKOS_ENABLE_CUDA +// fake Kokkos::Cuda for non-CUDA builds namespace Kokkos { class Cuda {}; } // namespace Kokkos @@ -40,68 +41,39 @@ using namespace LAMMPS_NS; /* ---------------------------------------------------------------------- */ -template -MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos options): +template +MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos options): Pointers(lmp), list_(nullptr), options_(std::move(options)), caches_(), atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32))) { - torch::Device device = torch::kCPU; - if (std::is_same::value) { - device = torch::kCUDA; - } else { - device = torch::kCPU; - } - // We ask LAMMPS for a full neighbor lists because we need to know about // ALL pairs, even if options->full_list() is false. We will then filter // the pairs to only include each pair once where needed. auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); request->set_id(0); request->set_cutoff(options_.interaction_range); - request->set_kokkos_host(0); - request->set_kokkos_device(1); - - this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true)); -} - -template -MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Compute* requestor, MetatensorSystemOptionsKokkos options): - Pointers(lmp), - list_(nullptr), - options_(std::move(options)), - caches_(), - atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32))) -{ - torch::Device device = torch::kCPU; - if (std::is_same::value) { - device = torch::kCUDA; - } else { - device = torch::kCPU; - } - - auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST); - request->set_id(0); - request->set_cutoff(options_.interaction_range); - - this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true)); + // set whether the kokkos NL should be calculated on host or device + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); } -template -MetatensorSystemAdaptorKokkos::~MetatensorSystemAdaptorKokkos() { +template +MetatensorSystemAdaptorKokkos::~MetatensorSystemAdaptorKokkos() { } -template -void MetatensorSystemAdaptorKokkos::init_list(int id, NeighList* ptr) { +template +void MetatensorSystemAdaptorKokkos::init_list(int id, NeighList* ptr) { assert(id == 0); list_ = ptr; } -template -void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) { +template +void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) { if (cutoff > options_.interaction_range) { error->all(FLERR, "Invalid metatensor model: one of the requested neighbor lists " @@ -127,26 +99,21 @@ void MetatensorSystemAdaptorKokkos::add_nl_request(double cutoff, } -template -void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor_torch::System& system) { - // auto start = std::chrono::high_resolution_clock::now(); - // auto end = std::chrono::high_resolution_clock::now(); - +template +void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor_torch::System& system) { auto dtype = system->positions().scalar_type(); auto device = system->positions().device(); - auto positions_kokkos = this->atomKK->k_x. template view(); + auto positions_kokkos = this->atomKK->k_x. template view(); auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - /*-------------- whatever, this will be done on CPU for now ------------------------*/ - // The cost of this section seems to be very low - + /*-------------- this will be done on CPU for now ------------------------*/ // There is no kokkos cell in LAMMPS, so we need to transfer auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); + // The hashmap in the following code is not easy to implement in either Kokkos or torch + // The cost of this section seems to be very low anyway + // Collect the local atom id of all local & ghosts atoms, mapping ghosts // atoms which are periodic images of local atoms back to the local atoms. // @@ -195,49 +162,43 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten ); original_atom_id_tensor = original_atom_id_tensor.to(device); // RIP - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << " ghost mapping (CPU): " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; - - /*----------- end of whatever, this will be done on CPU for now --------------*/ + /*----------- end of "this will be done on CPU for now" --------------*/ - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - NeighListKokkos* list_kk = static_cast*>(this->list_); + NeighListKokkos* list_kk = static_cast*>(this->list_); auto numneigh_kk = list_kk->d_numneigh; - auto neighbors_kk = list_kk->d_neighbors_transpose; + auto neighbors_kk = list_kk->d_neighbors_transpose; // transpose to have the same memory format as torch. This was requested in PairMetatensorKokkos::settings auto ilist_kk = list_kk->d_ilist; auto max_number_of_neighbors = list_kk->maxneighs; - // mask neighbors_kk with NEIGHMASK - Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); + // mask neighbors_kk with NEIGHMASK. Torch doesn't have this functionality, we do it in Kokkos + Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) { auto local_i = i / max_number_of_neighbors; auto local_j = i % max_number_of_neighbors; neighbors_kk_masked(local_i, local_j) = neighbors_kk(local_i, local_j) & NEIGHMASK; }); + // Convert NL-related data to torch tensors auto numneigh_torch = torch::from_blob( numneigh_kk.data(), {total_n_atoms}, torch::TensorOptions().dtype(torch::kInt32).device(device) ); - auto neighbors_torch = torch::from_blob( neighbors_kk_masked.data(), {total_n_atoms, max_number_of_neighbors}, torch::TensorOptions().dtype(torch::kInt32).device(device) ); - auto ilist_torch = torch::from_blob( ilist_kk.data(), {total_n_atoms}, torch::TensorOptions().dtype(torch::kInt32).device(device) ); + // convert from LAMMPS NL format to metatensor NL format auto expanded_arange = torch::arange(max_number_of_neighbors, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(0).expand({total_n_atoms, -1}); auto neighbor_2d_mask = expanded_arange < numneigh_torch.unsqueeze(1); @@ -247,42 +208,47 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto neighbors_tensor = neighbors_torch.masked_select(neighbor_2d_mask); - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << " NL format conversion: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; - - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - // change centers and neighbors to the original atom ids auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor); auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor); - // create torch tensor with the positions (TEMPORARY, TODO: change) + // create torch tensor with the positions auto positions_tensor = torch::from_blob( positions_kokkos.data(), {total_n_atoms, 3}, torch::TensorOptions().dtype(torch::kFloat64).device(device) ).to(dtype); + // The following code is a direct translation of the code in the non-Kokkos version (MetaTensorSystemAdaptor::setup_neighbors_remap), + // but rewritten in torch to use the GPU for (auto& cache: caches_) { // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!) auto full_list = cache.options->full_list(); - if (!full_list) { + + torch::Tensor centers_tensor_original_id_full_or_half; + torch::Tensor neighbors_tensor_original_id_full_or_half; + torch::Tensor centers_tensor_full_or_half; + torch::Tensor neighbors_tensor_full_or_half; + if (full_list) { + centers_tensor_full_or_half = centers_tensor; + neighbors_tensor_full_or_half = neighbors_tensor; + centers_tensor_original_id_full_or_half = centers_tensor_original_id; + neighbors_tensor_original_id_full_or_half = neighbors_tensor_original_id; + } else { auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id; - centers_tensor = centers_tensor.masked_select(half_list_mask); - neighbors_tensor = neighbors_tensor.masked_select(half_list_mask); - centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask); - neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask); + centers_tensor_full_or_half = centers_tensor.masked_select(half_list_mask); + neighbors_tensor_full_or_half = neighbors_tensor.masked_select(half_list_mask); + centers_tensor_original_id_full_or_half = centers_tensor_original_id.masked_select(half_list_mask); + neighbors_tensor_original_id_full_or_half = neighbors_tensor_original_id.masked_select(half_list_mask); } // distance mask - auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor); + auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor_full_or_half) - positions_tensor.index_select(0, centers_tensor_full_or_half); auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff; // index everything with the mask - auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask); - auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask); + auto centers_tensor_original_id_filtered = centers_tensor_original_id_full_or_half.masked_select(distance_mask); + auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_full_or_half.masked_select(distance_mask); auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()}); // find filtered interatomic vectors using the original atoms @@ -293,10 +259,19 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto cell_shifts = pair_shifts.matmul(cell_inv_tensor); cell_shifts = torch::round(cell_shifts).to(torch::kInt32); - if (!full_list) { + torch::Tensor centers_tensor_original_id_filtered_full_or_half; + torch::Tensor neighbors_tensor_original_id_filtered_full_or_half; + torch::Tensor interatomic_vectors_filtered_full_or_half; + torch::Tensor cell_shifts_full_or_half; + if (full_list) { + centers_tensor_original_id_filtered_full_or_half = centers_tensor_original_id_filtered; + neighbors_tensor_original_id_filtered_full_or_half = neighbors_tensor_original_id_filtered; + interatomic_vectors_filtered_full_or_half = interatomic_vectors_filtered; + cell_shifts_full_or_half = cell_shifts; + } else { auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered; auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0; - // reproduce this mask with torch: + // reproduce this mask (from MetaTensorSystemAdaptor::setup_neighbors_remap) with torch: // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0))) auto edge_mask = ( torch::sum(cell_shifts, 1) == 0 & ( @@ -307,16 +282,14 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten ) ); auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask)); - centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask); - neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask); - interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()}); - cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()}); + centers_tensor_original_id_filtered_full_or_half = centers_tensor_original_id_filtered.masked_select(final_mask); + neighbors_tensor_original_id_filtered_full_or_half = neighbors_tensor_original_id_filtered.masked_select(final_mask); + interatomic_vectors_filtered_full_or_half = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()}); + cell_shifts_full_or_half = cell_shifts.index({final_mask, torch::indexing::Slice()}); } - centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1); - neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1); - auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1); - + // make sure all the sample are unique + auto samples_values = torch::concatenate({centers_tensor_original_id_filtered_full_or_half.unsqueeze(-1), neighbors_tensor_original_id_filtered_full_or_half.unsqueeze(-1), cell_shifts_full_or_half}, 1); auto [samples_values_unique, samples_inverse, _] = torch::unique_dim( samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false ); @@ -328,24 +301,14 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options()); sample_indices.scatter_(0, samples_inverse, permutation); - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << " NL filtering: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; - - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - + // wrap into metatensor data structures auto samples = torch::make_intrusive( std::vector{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"}, samples_values_unique ); - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << " Time to create big labels: " << std::chrono::duration_cast(end - start).count() / 1000.0 << "ms" << std::endl; - auto neighbor_list = torch::make_intrusive( - interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1), + interatomic_vectors_filtered_full_or_half.index_select(0, sample_indices).unsqueeze(-1), samples->to(device), std::vector{ metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device), @@ -360,200 +323,14 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metaten } -template -void MetatensorSystemAdaptorKokkos::setup_neighbors_no_remap(metatensor_torch::System& system) { - auto dtype = system->positions().scalar_type(); - auto device = system->positions().device(); - - auto positions_kokkos = this->atomKK->k_x. template view(); - auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - - auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype); - // it might be a good idea to have this as float32 if the model is using float32 - // to speed up the computation, especially on GPU - - - /*-------------- whatever, this will be done on CPU for now ------------------------*/ - - // Collect the local atom id of all local & ghosts atoms, mapping ghosts - // atoms which are periodic images of local atoms back to the local atoms. - // - // Metatensor expects pairs corresponding to periodic atoms to be between - // the main atoms, but using the actual distance vector between the atom and - // the ghost. - original_atom_id_.clear(); - original_atom_id_.reserve(total_n_atoms); - - // identify all local atom by their LAMMPS atom tag. - local_atoms_tags_.clear(); - for (int i=0; inlocal; i++) { - original_atom_id_.emplace_back(i); - local_atoms_tags_.emplace(atom->tag[i], i); - } - - // now loop over ghosts & map them back to the main cell if needed - ghost_atoms_tags_.clear(); - for (int i=atom->nlocal; itag[i]; - auto it = local_atoms_tags_.find(tag); - if (it != local_atoms_tags_.end()) { - // this is the periodic image of an atom already owned by this domain - original_atom_id_.emplace_back(it->second); - } else { - // this can either be a periodic image of an atom owned by another - // domain, or directly an atom from another domain. Since we can not - // really distinguish between these, we take the first atom as the - // "main" one and remap all atoms with the same tag to the first one - auto it = ghost_atoms_tags_.find(tag); - if (it != ghost_atoms_tags_.end()) { - // we already found this atom elsewhere in the system - original_atom_id_.emplace_back(it->second); - } else { - // this is the first time we are seeing this atom - original_atom_id_.emplace_back(i); - ghost_atoms_tags_.emplace(tag, i); - } - } - } - /*----------- end of whatever, this will be done on CPU for now --------------*/ - - auto original_atom_id_tensor = torch::from_blob( - original_atom_id_.data(), - {total_n_atoms}, - torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) - ); - original_atom_id_tensor = original_atom_id_tensor.to(device); // RIP - - // Accumulate total number of pairs - int total_number_of_pairs = 0; - for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { - total_number_of_pairs += list_->numneigh[ii]; - } - std::vector centers(total_number_of_pairs); - std::vector neighbors(total_number_of_pairs); - - // Fill the centers and neighbors arrays with the original atom ids - int pair_index = 0; - for (int ii=0; ii<(list_->inum + list_->gnum); ii++) { - auto atom_i = list_->ilist[ii]; - auto neighbors_ii = list_->firstneigh[ii]; - for (int jj=0; jjnumneigh[ii]; jj++) { - centers[pair_index] = atom_i; - neighbors[pair_index] = neighbors_ii[jj] & NEIGHMASK; - pair_index++; - } - } - - // Create torch tensors for the centers and neighbors arrays - auto centers_tensor = torch::from_blob( - centers.data(), - {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) - ); - centers_tensor = centers_tensor.to(device); - auto neighbors_tensor = torch::from_blob( - neighbors.data(), - {total_number_of_pairs}, - torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU) - ); - neighbors_tensor = neighbors_tensor.to(device); - - // change centers and neighbors to the original atom ids - auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor); - auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor); - - // create torch tensor with the positions (TEMPORARY, TODO: change) - auto positions_tensor = torch::from_blob( - positions_kokkos.data(), - {total_n_atoms, 3}, - torch::TensorOptions().dtype(torch::kFloat64).device(device) - ).to(dtype); - - for (auto& cache: caches_) { - // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!) - auto full_list = cache.options->full_list(); - if (!full_list) { - auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id; - centers_tensor = centers_tensor.masked_select(half_list_mask); - neighbors_tensor = neighbors_tensor.masked_select(half_list_mask); - centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask); - neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask); - } - - // distance mask - auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor); - auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff; - - // index everything with the mask - auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask); - auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask); - auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()}); - - // find filtered interatomic vectors using the original atoms - auto interatomic_vectors_original_filtered = positions_tensor.index_select(0, neighbors_tensor_original_id_filtered) - positions_tensor.index_select(0, centers_tensor_original_id_filtered); - - // cell shifts - auto pair_shifts = interatomic_vectors_filtered - interatomic_vectors_original_filtered; - auto cell_shifts = pair_shifts.matmul(cell_inv_tensor); - cell_shifts = torch::round(cell_shifts).to(torch::kInt32); - - if (!full_list) { - auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered; - auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0; - // reproduce this mask with torch: - // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0))) - auto edge_mask = ( - torch::sum(cell_shifts, 1) == 0 & ( - cell_shifts.index({torch::indexing::Slice(), 2}) < 0 | ( - cell_shifts.index({torch::indexing::Slice(), 2}) == 0 & - cell_shifts.index({torch::indexing::Slice(), 1}) < 0 - ) - ) - ); - auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask)); - centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask); - neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask); - interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()}); - cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()}); - } - - centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1); - neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1); - auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1); - - auto [samples_values_unique, samples_inverse, _] = torch::unique_dim( - samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false - ); - - auto permutation = torch::arange(samples_inverse.size(0), samples_inverse.options()); - samples_inverse = samples_inverse.flip({0}); - permutation = permutation.flip({0}); - - auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options()); - sample_indices.scatter_(0, samples_inverse, permutation); - - auto samples = torch::make_intrusive( - std::vector{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"}, - samples_values_unique - ); - - auto neighbor_list = torch::make_intrusive( - interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device), - samples->to(device), - std::vector{ - metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device), - }, - metatensor_torch::LabelsHolder::create({"distance"}, {{0}})->to(device) - ); - - metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency); - system->add_neighbor_list(cache.options, neighbor_list); - } +template +void MetatensorSystemAdaptorKokkos::setup_neighbors_no_remap(metatensor_torch::System& system) { + throw std::runtime_error("The metatensor/kk requires remap_pairs to be true"); } -template -metatensor_torch::System MetatensorSystemAdaptorKokkos::system_from_lmp( +template +metatensor_torch::System MetatensorSystemAdaptorKokkos::system_from_lmp( bool do_virial, bool remap_pairs, torch::ScalarType dtype, @@ -561,9 +338,9 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr ) { auto total_n_atoms = atomKK->nlocal + atomKK->nghost; - auto atom_types_lammps_kokkos = atomKK->k_type.view(); + auto atom_types_lammps_kokkos = atomKK->k_type.view(); auto mapping = options_.types_mapping_kokkos; - Kokkos::View atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms); /// Can be a class member? (allocation alert) + Kokkos::View atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms); Kokkos::parallel_for( "MetatensorSystemAdaptorKokkos::system_from_lmp::atom_types_mapping", @@ -577,32 +354,30 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr atom_types_metatensor_kokkos.data(), {total_n_atoms}, torch::TensorOptions().dtype(torch::kInt32).device(device) - ).clone(); /// Again, allocation alert. Not sure if this can be avoided + ).clone(); // clone because the original memory belongs to Kokkos and will be deallocated // atom->x contains "real" and then ghost atoms, in that order - auto positions_kokkos = atomKK->k_x.view(); + auto positions_kokkos = atomKK->k_x.view(); auto tensor_options_positions = torch::TensorOptions().dtype(torch::kFloat64).device(device); this->positions = torch::from_blob( positions_kokkos.data(), {total_n_atoms, 3}, // requires_grad=true since we always need gradients w.r.t. positions tensor_options_positions - ).clone().requires_grad_(true); /// Allocation alert (clone) + ).clone().requires_grad_(true); // clone (same as above) auto tensor_options_cell = torch::TensorOptions().dtype(dtype).device(device); - auto cell = torch::zeros({3, 3}, tensor_options_cell); /// Allocation alert, we could make it a class member and allocate it once - /// domain doesn't seem to have a Kokkos version + auto cell = torch::zeros({3, 3}, tensor_options_cell); // we could make it a class member and allocate it once + + // domain doesn't seem to have a Kokkos version. We will need to transfer the cell to the device cell[0][0] = domain->xprd; - cell[1][0] = domain->xy; cell[1][1] = domain->yprd; - cell[2][0] = domain->xz; cell[2][1] = domain->yz; cell[2][2] = domain->zprd; - /// And the other elements? Are they always zero? auto system_positions = this->positions.to(dtype); - cell = cell.to(dtype).to(device); /// to(device) alert. How do we find the cell on Kokkos? + cell = cell.to(dtype).to(device); if (do_virial) { auto model_strain = this->strain.to(dtype); /// already on the correct device @@ -630,4 +405,8 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos::system_fr namespace LAMMPS_NS { template class MetatensorNeighborsDataKokkos; template class MetatensorSystemAdaptorKokkos; +#ifdef LMP_KOKKOS_GPU +template class MetatensorNeighborsDataKokkos; +template class MetatensorSystemAdaptorKokkos; +#endif } diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h index fbd788235af..3ebea22a1fd 100644 --- a/src/KOKKOS/metatensor_system_kokkos.h +++ b/src/KOKKOS/metatensor_system_kokkos.h @@ -28,11 +28,11 @@ namespace LAMMPS_NS { -template +template struct MetatensorSystemOptionsKokkos { // Mapping from LAMMPS types to metatensor types const int32_t* types_mapping; - const Kokkos::View types_mapping_kokkos; + const Kokkos::View types_mapping_kokkos; // interaction range of the model, in LAMMPS units double interaction_range; // should we run extra checks on the neighbor lists? @@ -40,7 +40,7 @@ struct MetatensorSystemOptionsKokkos { }; // data for metatensor neighbors lists -template +template struct MetatensorNeighborsDataKokkos { // single neighbors sample containing [i, j, S_a, S_b, S_c] using sample_t = std::array; @@ -79,11 +79,11 @@ struct MetatensorNeighborsDataKokkos { std::vector> distances_f32; }; -template +template class MetatensorSystemAdaptorKokkos : public Pointers { public: - MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos options); - MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos options); + MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos options); + MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos options); ~MetatensorSystemAdaptorKokkos(); @@ -123,13 +123,13 @@ class MetatensorSystemAdaptorKokkos : public Pointers { private: // options for this system adaptor - MetatensorSystemOptionsKokkos options_; + MetatensorSystemOptionsKokkos options_; // LAMMPS NL NeighList* list_; // allocations caches for all the NL requested by // the model - std::vector> caches_; + std::vector> caches_; // allocation cache for the atomic types in the system torch::Tensor atomic_types_; // allocation cache holding the "original atom" id for all atoms in the diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index 5cabe04f376..c51d90de88e 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -57,6 +57,7 @@ class Cuda {}; using namespace LAMMPS_NS; +template struct LAMMPS_NS::PairMetatensorDataKokkos { PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit); @@ -80,10 +81,11 @@ struct LAMMPS_NS::PairMetatensorDataKokkos { double max_cutoff; // adaptor from LAMMPS system to metatensor's - std::unique_ptr> system_adaptor; + std::unique_ptr> system_adaptor; }; -PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit): +template +PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit): system_adaptor(nullptr), device(torch::kCPU), check_consistency(false), @@ -106,7 +108,8 @@ PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std: this->evaluation_options->outputs.insert("energy", output); } -void PairMetatensorDataKokkos::load_model( +template +void PairMetatensorDataKokkos::load_model( LAMMPS* lmp, const char* path, const char* extensions_directory @@ -131,7 +134,7 @@ void PairMetatensorDataKokkos::load_model( } auto capabilities_ivalue = this->model->run_method("capabilities"); - this->capabilities = capabilities_ivalue.toCustomClass(); + this->capabilities = capabilities_ivalue. template toCustomClass(); if (!this->capabilities->outputs().contains("energy")) { lmp->error->all(FLERR, "the model at '{}' does not have an \"energy\" output, we can not use it in pair_style metatensor", path); @@ -139,7 +142,7 @@ void PairMetatensorDataKokkos::load_model( if (lmp->comm->me == 0) { auto metadata_ivalue = this->model->run_method("metadata"); - auto metadata = metadata_ivalue.toCustomClass(); + auto metadata = metadata_ivalue. template toCustomClass(); auto to_print = metadata->print(); if (lmp->screen) { @@ -161,8 +164,8 @@ void PairMetatensorDataKokkos::load_model( /* ---------------------------------------------------------------------- */ -template -PairMetatensorKokkos::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) { +template +PairMetatensorKokkos::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) { std::string energy_unit; std::string length_unit; if (strcmp(update->unit_style, "real") == 0) { @@ -185,11 +188,11 @@ PairMetatensorKokkos::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp // so we can not compute virial as fdotr this->no_virial_fdotr_compute = 1; - this->mts_data = new PairMetatensorDataKokkos(std::move(length_unit), std::move(energy_unit)); + this->mts_data = new PairMetatensorDataKokkos(std::move(length_unit), std::move(energy_unit)); } -template -PairMetatensorKokkos::~PairMetatensorKokkos() { +template +PairMetatensorKokkos::~PairMetatensorKokkos() { delete this->mts_data; if (allocated) { @@ -200,8 +203,8 @@ PairMetatensorKokkos::~PairMetatensorKokkos() { } // called when finding `pair_style metatensor` in the input -template -void PairMetatensorKokkos::settings(int argc, char ** argv) { +template +void PairMetatensorKokkos::settings(int argc, char ** argv) { if (argc == 0) { error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc); } @@ -337,13 +340,13 @@ void PairMetatensorKokkos::settings(int argc, char ** argv) { mts_data->model->to(mts_data->device); // Handle potential mismatch between Kokkos and model devices - if (std::is_same::value) { + if (std::is_same::value) { if (!mts_data->device.is_cuda()) { throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU"); } } else { if (!mts_data->device.is_cpu()) { - throw std::runtime_error("Kokkos is running on the host, but the model is not on CPU"); + throw std::runtime_error("Kokkos is running on CPU, but the model is not on CPU"); } } @@ -362,12 +365,12 @@ void PairMetatensorKokkos::settings(int argc, char ** argv) { // this will allow us to receive the NL in a GPU-friendly format this->lmp->kokkos->neigh_transpose = 1; - std::cout << "Running on " << typeid(ExecutionSpaceFromDevice::space).name() << std::endl; + std::cout << "Running on " << typeid(ExecutionSpaceFromDevice::space).name() << std::endl; } -template -void PairMetatensorKokkos::allocate() { +template +void PairMetatensorKokkos::allocate() { allocated = 1; // setflags stores whether the coeff for a given pair of atom types are known @@ -411,15 +414,15 @@ void PairMetatensorKokkos::allocate() { } } -template -double PairMetatensorKokkos::init_one(int, int) { +template +double PairMetatensorKokkos::init_one(int, int) { return mts_data->max_cutoff; } // called on pair_coeff -template -void PairMetatensorKokkos::coeff(int argc, char ** argv) { +template +void PairMetatensorKokkos::coeff(int argc, char ** argv) { if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) { error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * `"); } @@ -447,8 +450,8 @@ void PairMetatensorKokkos::coeff(int argc, char ** argv) { // called when the run starts -template -void PairMetatensorKokkos::init_style() { +template +void PairMetatensorKokkos::init_style() { // Require newton pair on since we need to communicate forces accumulated on // ghost atoms to neighboring domains. These forces contributions come from // gradient of a local descriptor w.r.t. domain ghosts (periodic images @@ -473,7 +476,7 @@ void PairMetatensorKokkos::init_style() { // determine the maximal cutoff in the NL auto requested_nl = mts_data->model->run_method("requested_neighbor_lists"); for (const auto& ivalue: requested_nl.toList()) { - auto options = ivalue.get().toCustomClass(); + auto options = ivalue.get(). template toCustomClass(); auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit()); mts_data->max_cutoff = std::max(mts_data->max_cutoff, cutoff); @@ -490,28 +493,30 @@ void PairMetatensorKokkos::init_style() { } /// create Kokkos view for type_mapping - Kokkos::View type_mapping_kokkos("type_mapping", atom->ntypes + 1); + Kokkos::View type_mapping_kokkos("type_mapping", atomKK->ntypes + 1); /// copy type_mapping to the Kokkos view (via a host mirror view) auto type_mapping_kokkos_host = Kokkos::create_mirror_view(type_mapping_kokkos); - for (int i = 0; i < atom->ntypes + 1; i++) { + for (int i = 0; i < atomKK->ntypes + 1; i++) { type_mapping_kokkos_host(i) = type_mapping[i]; } Kokkos::deep_copy(type_mapping_kokkos, type_mapping_kokkos_host); // create system adaptor - auto options = MetatensorSystemOptionsKokkos{ + auto options = MetatensorSystemOptionsKokkos{ this->type_mapping, type_mapping_kokkos, mts_data->max_cutoff, mts_data->check_consistency, }; - mts_data->system_adaptor = std::make_unique>(lmp, this, options); + mts_data->system_adaptor = std::make_unique>(lmp, this, options); + // set up the strain on the system adaptor to the correct device to avoid an unnecessary transfer at each step + this->mts_data->system_adaptor->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(mts_data->device).requires_grad(true)); // Translate from the metatensor neighbor lists requests to LAMMPS neighbor // lists requests. auto requested_nl = mts_data->model->run_method("requested_neighbor_lists"); for (const auto& ivalue: requested_nl.toList()) { - auto options = ivalue.get().toCustomClass(); + auto options = ivalue.get(). template toCustomClass(); auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit()); assert(cutoff <= mts_data->max_cutoff); @@ -520,31 +525,17 @@ void PairMetatensorKokkos::init_style() { } -template -void PairMetatensorKokkos::init_list(int id, NeighList *ptr) { +template +void PairMetatensorKokkos::init_list(int id, NeighList *ptr) { mts_data->system_adaptor->init_list(id, ptr); } -template -void PairMetatensorKokkos::compute(int eflag, int vflag) { - // auto start = std::chrono::high_resolution_clock::now(); - // auto end = std::chrono::high_resolution_clock::now(); - - // auto x = atomKK->k_x.view(); - // auto h_array = Kokkos::create_mirror_view(d_array); - // Kokkos::deep_copy(h_array, d_array); - // // Print the values on the host - // for (int i = 0; i < 32; ++i) { - // for (int j = 0; j < 3; ++j) { - // std::cout << h_array(i, j) << " "; - // } - // std::cout << std::endl; - // } - +template +void PairMetatensorKokkos::compute(int eflag, int vflag) { /// Declare what we need to read from the atomKK object and what we will modify - this->atomKK->sync(ExecutionSpaceFromDevice::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); - this->atomKK->modified(ExecutionSpaceFromDevice::space, ENERGY_MASK | F_MASK | VIRIAL_MASK); + this->atomKK->sync(ExecutionSpaceFromDevice::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK); + this->atomKK->modified(ExecutionSpaceFromDevice::space, ENERGY_MASK | F_MASK | VIRIAL_MASK); if (eflag || vflag) { ev_setup(eflag, vflag); @@ -567,23 +558,16 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype()); } - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - // transform from LAMMPS to metatensor System auto system = mts_data->system_adaptor->system_from_lmp( static_cast(vflag_global), mts_data->remap_pairs, dtype, mts_data->device ); - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << "sys-from-lmp: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; - // only run the calculation for atoms actually in the current domain auto tensor_options = torch::TensorOptions().dtype(torch::kInt32).device(mts_data->device); torch::Tensor selected_atoms_values = torch::stack({ - torch::zeros({atom->nlocal}, tensor_options), - torch::arange(atom->nlocal, tensor_options) + torch::zeros({atomKK->nlocal}, tensor_options), + torch::arange(atomKK->nlocal, tensor_options) }, -1); auto selected_atoms = torch::make_intrusive( @@ -592,10 +576,6 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { mts_data->evaluation_options->set_selected_atoms(selected_atoms); torch::IValue result_ivalue; - - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - try { result_ivalue = mts_data->model->forward({ std::vector{system}, @@ -606,10 +586,6 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { error->all(FLERR, "error evaluating the torch model: {}", e.what()); } - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << "Time taken forward: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; - auto result = result_ivalue.toGenericDict(); auto energy = result.at("energy").toCustomClass(); auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values(); @@ -619,7 +595,7 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { torch::Tensor global_energy; if (eflag_atom) { auto energies = energy_detached.accessor(); - for (int i=0; inlocal + atom->nghost; i++) { + for (int i=0; inlocal + atomKK->nghost; i++) { // TODO: handle out of order samples eatom[i] += energies[i][0]; } @@ -640,24 +616,15 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor(); // compute forces/virial with backward propagation - - // torch::cuda::synchronize(); - // start = std::chrono::high_resolution_clock::now(); - energy_tensor.backward(-torch::ones_like(energy_tensor)); - // torch::cuda::synchronize(); - // end = std::chrono::high_resolution_clock::now(); - // std::cout << "Time taken backward: " << std::chrono::duration_cast(end - start).count() / 1000.0 << " ms" << std::endl; - auto forces_tensor = mts_data->system_adaptor->positions.grad(); assert(forces_tensor.scalar_type() == torch::kFloat64); - auto forces_lammps_kokkos = this->atomKK->k_f. template view(); - /// Is it possible to do double*[3] here? - auto forces_metatensor_kokkos = Kokkos::View>(forces_tensor.contiguous().data_ptr(), atom->nlocal + atom->nghost, 3); + auto forces_lammps_kokkos = this->atomKK->k_f. template view(); + auto forces_metatensor_kokkos = Kokkos::View>(forces_tensor.contiguous(). template data_ptr(), atomKK->nlocal + atomKK->nghost, 3); - Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atom->nlocal + atom->nghost, KOKKOS_LAMBDA(const int i) { + Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atomKK->nlocal + atomKK->nghost, KOKKOS_LAMBDA(const int i) { forces_lammps_kokkos(i, 0) += forces_metatensor_kokkos(i, 0); forces_lammps_kokkos(i, 1) += forces_metatensor_kokkos(i, 1); forces_lammps_kokkos(i, 2) += forces_metatensor_kokkos(i, 2); @@ -670,9 +637,9 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { assert(virial_tensor.scalar_type() == torch::kFloat64); // apparently the cell is not supported in Kokkos format, - // so it has to be updated on CPU (??) + // so it has to be updated on CPU auto predicted_virial_tensor_cpu = virial_tensor.cpu(); - auto predicted_virial = predicted_virial_tensor_cpu.accessor(); + auto predicted_virial = predicted_virial_tensor_cpu. template accessor(); virial[0] += predicted_virial[0][0]; virial[1] += predicted_virial[1][1]; @@ -690,5 +657,7 @@ void PairMetatensorKokkos::compute(int eflag, int vflag) { namespace LAMMPS_NS { template class PairMetatensorKokkos; -/// TODO: Host version +#ifdef LMP_KOKKOS_GPU +template class PairMetatensorKokkos; +#endif } diff --git a/src/KOKKOS/pair_metatensor_kokkos.h b/src/KOKKOS/pair_metatensor_kokkos.h index 8f5f144cec5..4dbd199810b 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.h +++ b/src/KOKKOS/pair_metatensor_kokkos.h @@ -19,21 +19,21 @@ PairStyle(metatensor/kk, PairMetatensorKokkos); #ifndef LMP_PAIR_METATENSOR_KOKKOS_H #define LMP_PAIR_METATENSOR_KOKKOS_H -#include "kokkos_base.h" #include "pair_kokkos.h" namespace LAMMPS_NS { -template +template class MetatensorSystemAdaptorKokkos; +template struct PairMetatensorDataKokkos; /// I noticed that most other kokkos packages inherit from their non-kokkos /// counterparts. It doesn't look like a good idea to me because /// they end up overriding everything... Not doing it here for now. -template -class PairMetatensorKokkos : public Pair, public KokkosBase { +template +class PairMetatensorKokkos : public Pair { public: PairMetatensorKokkos(class LAMMPS *); ~PairMetatensorKokkos(); @@ -47,7 +47,7 @@ class PairMetatensorKokkos : public Pair, public KokkosBase { void allocate(); private: - PairMetatensorDataKokkos* mts_data; + PairMetatensorDataKokkos* mts_data; // mapping from LAMMPS types to metatensor types int32_t* type_mapping; From a2a358f4d915159869e8f678fe4241bb4265c4ea Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Thu, 24 Oct 2024 13:56:23 +0200 Subject: [PATCH 08/15] Partial clean-up of the examples folder --- .../metatensor/log.26Jun2024.metatensor.g++.1 | 135 ++ .../metatensor/log.26Jun2024.metatensor.g++.4 | 135 ++ examples/PACKAGES/metatensor/logg | 1415 ----------------- 3 files changed, 270 insertions(+), 1415 deletions(-) create mode 100644 examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1 create mode 100644 examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4 delete mode 100644 examples/PACKAGES/metatensor/logg diff --git a/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1 b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1 new file mode 100644 index 00000000000..e400bfa0886 --- /dev/null +++ b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1 @@ -0,0 +1,135 @@ +LAMMPS (17 Apr 2024 - Development - patch_17Apr2024-557-gef1630afd2) + using 1 OpenMP thread(s) per MPI task +units metal +boundary p p p + +atom_style atomic +lattice fcc 3.6 +Lattice spacing in x,y,z = 3.6 3.6 3.6 +region box block 0 2 0 2 0 2 +create_box 1 box +Created orthogonal box = (0 0 0) to (7.2 7.2 7.2) + 1 by 1 by 1 MPI processor grid +create_atoms 1 box +Created 32 atoms + using lattice units in orthogonal box = (0 0 0) to (7.2 7.2 7.2) + create_atoms CPU = 0.000 seconds + +labelmap atom 1 Ni +mass Ni 58.693 + +velocity all create 123 42 + +pair_style metatensor nickel-lj.pt + +This is the Test Lennard-Jones model +==================================== + +Minimal shifted Lennard-Jones potential, to be used when testing the +integration of metatensor atomistic models with various simulation engines. + +Model authors +------------- + +- Guillaume Fraux + +Model references +---------------- + +Please cite the following references when using this model: +- about this specific model: + * https://github.com/luthaf/metatensor-lj-test +- about the implementation of this model: + * https://github.com/lab-cosmo/metatensor + +Running simulation on cpu device with float64 data +# pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/ +pair_coeff * * 28 + +timestep 0.001 +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0 +fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 $(1000 * dt) drag 1.0 +fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 1 drag 1.0 + +thermo 10 +thermo_style custom step temp pe etotal press vol + +# dump 1 all atom 10 dump.metatensor + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- https://github.com/lab-cosmo/metatensor +- https://github.com/luthaf/metatensor-lj-test +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 8.5 + ghost atom cutoff = 8.5 + binsize = 4.25, bins = 2 2 2 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair metatensor, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.377 | 3.377 | 3.377 Mbytes + Step Temp PotEng TotEng Press Volume + 0 123 -8.2814195 -7.7885506 -67585.536 373.248 + 10 124.8498 -8.395127 -7.8948458 -68884.117 370.7507 + 20 130.60229 -8.7447028 -8.221371 -72913.372 363.3685 + 30 140.95014 -9.3595663 -8.79477 -80162.936 351.36119 + 40 157.33663 -10.29346 -9.6630017 -91879.643 335.18851 + 50 181.74279 -11.619487 -10.891232 -108735.54 315.60425 + 60 216.75162 -13.405317 -12.536779 -131438.83 293.736 + 70 264.39963 -15.685874 -14.626408 -160402.97 270.99304 + 80 319.4713 -18.40371 -17.123568 -192237.03 248.74525 + 90 350.37789 -21.272294 -19.868307 -215596.99 227.98439 + 100 298.01005 -23.674365 -22.48022 -206922.9 209.26415 +Loop time of 4.01198 on 1 procs for 100 steps with 32 atoms + +Performance: 2.154 ns/day, 11.144 hours/ns, 24.925 timesteps/s, 797.611 atom-step/s +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.9962 | 3.9962 | 3.9962 | 0.0 | 99.61 +Neigh | 0.01366 | 0.01366 | 0.01366 | 0.0 | 0.34 +Comm | 0.00055756 | 0.00055756 | 0.00055756 | 0.0 | 0.01 +Output | 0.00016915 | 0.00016915 | 0.00016915 | 0.0 | 0.00 +Modify | 0.001224 | 0.001224 | 0.001224 | 0.0 | 0.03 +Other | | 0.00021 | | | 0.01 + +Nlocal: 32 ave 32 max 32 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1655 ave 1655 max 1655 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 11490 ave 11490 max 11490 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 11490 +Ave neighs/atom = 359.0625 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:04 diff --git a/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4 b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4 new file mode 100644 index 00000000000..d6dbff0d78e --- /dev/null +++ b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4 @@ -0,0 +1,135 @@ +LAMMPS (17 Apr 2024 - Development - patch_17Apr2024-557-gef1630afd2) + using 1 OpenMP thread(s) per MPI task +units metal +boundary p p p + +atom_style atomic +lattice fcc 3.6 +Lattice spacing in x,y,z = 3.6 3.6 3.6 +region box block 0 2 0 2 0 2 +create_box 1 box +Created orthogonal box = (0 0 0) to (7.2 7.2 7.2) + 1 by 2 by 2 MPI processor grid +create_atoms 1 box +Created 32 atoms + using lattice units in orthogonal box = (0 0 0) to (7.2 7.2 7.2) + create_atoms CPU = 0.001 seconds + +labelmap atom 1 Ni +mass Ni 58.693 + +velocity all create 123 42 + +pair_style metatensor nickel-lj.pt + +This is the Test Lennard-Jones model +==================================== + +Minimal shifted Lennard-Jones potential, to be used when testing the +integration of metatensor atomistic models with various simulation engines. + +Model authors +------------- + +- Guillaume Fraux + +Model references +---------------- + +Please cite the following references when using this model: +- about this specific model: + * https://github.com/luthaf/metatensor-lj-test +- about the implementation of this model: + * https://github.com/lab-cosmo/metatensor + +Running simulation on cpu device with float64 data +# pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/ +pair_coeff * * 28 + +timestep 0.001 +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0 +fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 $(1000 * dt) drag 1.0 +fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 1 drag 1.0 + +thermo 10 +thermo_style custom step temp pe etotal press vol + +# dump 1 all atom 10 dump.metatensor + +run 100 + +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Your simulation uses code contributions which should be cited: + +- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419 + +@Article{Gissinger24, + author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor}, + title = {Type Label Framework for Bonded Force Fields in LAMMPS}, + journal = {J. Phys. Chem. B}, + year = 2024, + volume = 128, + number = 13, + pages = {3282–-3297} +} + +- https://github.com/lab-cosmo/metatensor +- https://github.com/luthaf/metatensor-lj-test +CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE + +Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule +Neighbor list info ... + update: every = 1 steps, delay = 0 steps, check = yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 8.5 + ghost atom cutoff = 8.5 + binsize = 4.25, bins = 2 2 2 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair metatensor, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.359 | 3.359 | 3.359 Mbytes + Step Temp PotEng TotEng Press Volume + 0 123 -8.2814195 -7.7885506 -67585.536 373.248 + 10 124.79957 -8.3949245 -7.8948446 -68883.161 370.7507 + 20 130.37558 -8.7437731 -8.2213497 -72909.223 363.36859 + 30 140.35202 -9.3570472 -8.7946476 -80147.272 351.36165 + 40 156.04321 -10.287834 -9.6625589 -91867.312 335.19011 + 50 179.24129 -11.608375 -10.890143 -108707.41 315.60758 + 60 212.26895 -13.385134 -12.534558 -131437.18 293.74143 + 70 257.12553 -15.653741 -14.623422 -160605.1 270.99882 + 80 309.97318 -18.367667 -17.125584 -193391.56 248.74226 + 90 345.50571 -21.286382 -19.901918 -220089.9 227.94171 + 100 318.19414 -23.90463 -22.629605 -220782.88 209.09539 +Loop time of 4.16296 on 4 procs for 100 steps with 32 atoms + +Performance: 2.075 ns/day, 11.564 hours/ns, 24.021 timesteps/s, 768.684 atom-step/s +97.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 3.8847 | 3.9666 | 4.0708 | 3.8 | 95.28 +Neigh | 0.008984 | 0.0098269 | 0.012274 | 1.4 | 0.24 +Comm | 0.060256 | 0.16685 | 0.24951 | 18.7 | 4.01 +Output | 0.00014112 | 0.0021948 | 0.0083534 | 7.6 | 0.05 +Modify | 0.010689 | 0.016268 | 0.018145 | 2.5 | 0.39 +Other | | 0.001203 | | | 0.03 + +Nlocal: 8 ave 10 max 6 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 1259 ave 1261 max 1257 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 2891 ave 3614 max 2167 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 11564 +Ave neighs/atom = 361.375 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:04 diff --git a/examples/PACKAGES/metatensor/logg b/examples/PACKAGES/metatensor/logg deleted file mode 100644 index d66c31e6a13..00000000000 --- a/examples/PACKAGES/metatensor/logg +++ /dev/null @@ -1,1415 +0,0 @@ - - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 11.588ms - creating samples Labels (287466 pairs) ... took 26.1818ms - moving neighbor data to dtype/device ... took 37.6156ms - creating neighbors TensorBlock ... took 7.37964ms - converting neighbors without ghosts remapping took 82.9443ms - creating System from LAMMPS data took 129.204ms - running Model::forward ... took 346.452ms - running Model::backward ... took 45.4164ms - storing model output in LAMMPS data structures ... took 0.061583ms -PairMetatensor::compute took 534.32ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.74904ms - creating samples Labels (287466 pairs) ... took 36.0691ms - moving neighbor data to dtype/device ... took 45.8921ms - creating neighbors TensorBlock ... took 0.119523ms - converting neighbors without ghosts remapping took 85.9571ms - creating System from LAMMPS data took 89.8463ms - running Model::forward ... took 30.4214ms - running Model::backward ... took 6.7433ms - storing model output in LAMMPS data structures ... took 0.058992ms -PairMetatensor::compute took 147.152ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.91786ms - creating samples Labels (287466 pairs) ... took 24.7922ms - moving neighbor data to dtype/device ... took 46.1817ms - creating neighbors TensorBlock ... took 0.154812ms - converting neighbors without ghosts remapping took 75.1849ms - creating System from LAMMPS data took 79.0453ms - running Model::forward ... took 83.0292ms - running Model::backward ... took 89.7461ms - storing model output in LAMMPS data structures ... took 0.06542ms -PairMetatensor::compute took 273.659ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.82377ms - creating samples Labels (287466 pairs) ... took 22.4251ms - moving neighbor data to dtype/device ... took 45.8675ms - creating neighbors TensorBlock ... took 0.126732ms - converting neighbors without ghosts remapping took 72.3638ms - creating System from LAMMPS data took 76.2302ms - running Model::forward ... took 0.791876ms - running Model::backward ... took 4.42353ms - storing model output in LAMMPS data structures ... took 0.059108ms -PairMetatensor::compute took 101.871ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.90645ms - creating samples Labels (287466 pairs) ... took 20.9111ms - moving neighbor data to dtype/device ... took 44.6434ms - creating neighbors TensorBlock ... took 0.111157ms - converting neighbors without ghosts remapping took 69.6583ms - creating System from LAMMPS data took 73.4327ms - running Model::forward ... took 0.794495ms - running Model::backward ... took 4.41927ms - storing model output in LAMMPS data structures ... took 0.059082ms -PairMetatensor::compute took 100.097ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.82388ms - creating samples Labels (287466 pairs) ... took 21.707ms - moving neighbor data to dtype/device ... took 45.2102ms - creating neighbors TensorBlock ... took 0.113546ms - converting neighbors without ghosts remapping took 70.9416ms - creating System from LAMMPS data took 74.8743ms - running Model::forward ... took 0.796907ms - running Model::backward ... took 5.37424ms - storing model output in LAMMPS data structures ... took 0.07182ms -PairMetatensor::compute took 101.204ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.87195ms - creating samples Labels (287466 pairs) ... took 21.1455ms - moving neighbor data to dtype/device ... took 45.696ms - creating neighbors TensorBlock ... took 0.110128ms - converting neighbors without ghosts remapping took 70.9161ms - creating System from LAMMPS data took 74.8993ms - running Model::forward ... took 0.786076ms - running Model::backward ... took 5.39018ms - storing model output in LAMMPS data structures ... took 0.058135ms -PairMetatensor::compute took 102.041ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.18867ms - creating samples Labels (287466 pairs) ... took 22.816ms - moving neighbor data to dtype/device ... took 49.7625ms - creating neighbors TensorBlock ... took 0.106828ms - converting neighbors without ghosts remapping took 76.9631ms - creating System from LAMMPS data took 80.7761ms - running Model::forward ... took 0.799495ms - running Model::backward ... took 4.40531ms - storing model output in LAMMPS data structures ... took 0.0666ms -PairMetatensor::compute took 107.403ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.17782ms - creating samples Labels (287466 pairs) ... took 22.7238ms - moving neighbor data to dtype/device ... took 46.4673ms - creating neighbors TensorBlock ... took 0.124666ms - converting neighbors without ghosts remapping took 73.5818ms - creating System from LAMMPS data took 77.4117ms - running Model::forward ... took 0.786817ms - running Model::backward ... took 4.40362ms - storing model output in LAMMPS data structures ... took 0.059165ms -PairMetatensor::compute took 103.758ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.30216ms - creating samples Labels (287466 pairs) ... took 24.2498ms - moving neighbor data to dtype/device ... took 48.4229ms - creating neighbors TensorBlock ... took 0.158321ms - converting neighbors without ghosts remapping took 77.2592ms - creating System from LAMMPS data took 81.0686ms - running Model::forward ... took 0.795704ms - running Model::backward ... took 5.35188ms - storing model output in LAMMPS data structures ... took 0.065461ms -PairMetatensor::compute took 107.435ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.9266ms - creating samples Labels (287466 pairs) ... took 21.134ms - moving neighbor data to dtype/device ... took 45.3357ms - creating neighbors TensorBlock ... took 0.11882ms - converting neighbors without ghosts remapping took 70.6304ms - creating System from LAMMPS data took 74.4252ms - running Model::forward ... took 0.796698ms - running Model::backward ... took 4.43267ms - storing model output in LAMMPS data structures ... took 0.05924ms -PairMetatensor::compute took 99.8232ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.77185ms - creating samples Labels (287466 pairs) ... took 21.1276ms - moving neighbor data to dtype/device ... took 44.7475ms - creating neighbors TensorBlock ... took 0.120111ms - converting neighbors without ghosts remapping took 69.8644ms - creating System from LAMMPS data took 73.6641ms - running Model::forward ... took 0.800139ms - running Model::backward ... took 5.3336ms - storing model output in LAMMPS data structures ... took 0.058551ms -PairMetatensor::compute took 99.9573ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.11082ms - creating samples Labels (287466 pairs) ... took 21.8182ms - moving neighbor data to dtype/device ... took 46.0812ms - creating neighbors TensorBlock ... took 0.123661ms - converting neighbors without ghosts remapping took 72.2505ms - creating System from LAMMPS data took 76.197ms - running Model::forward ... took 0.803314ms - running Model::backward ... took 4.38621ms - storing model output in LAMMPS data structures ... took 0.059434ms -PairMetatensor::compute took 102.593ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.16883ms - creating samples Labels (287466 pairs) ... took 23.0877ms - moving neighbor data to dtype/device ... took 45.8524ms - creating neighbors TensorBlock ... took 0.11302ms - converting neighbors without ghosts remapping took 73.3361ms - creating System from LAMMPS data took 77.0976ms - running Model::forward ... took 0.789521ms - running Model::backward ... took 5.35487ms - storing model output in LAMMPS data structures ... took 0.061607ms -PairMetatensor::compute took 104.648ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.1771ms - creating samples Labels (287466 pairs) ... took 23.5011ms - moving neighbor data to dtype/device ... took 47.459ms - creating neighbors TensorBlock ... took 0.111697ms - converting neighbors without ghosts remapping took 75.3441ms - creating System from LAMMPS data took 79.1161ms - running Model::forward ... took 0.812649ms - running Model::backward ... took 5.39704ms - storing model output in LAMMPS data structures ... took 0.058938ms -PairMetatensor::compute took 107.458ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.27067ms - creating samples Labels (287466 pairs) ... took 23.2998ms - moving neighbor data to dtype/device ... took 46.1408ms - creating neighbors TensorBlock ... took 0.158251ms - converting neighbors without ghosts remapping took 73.9736ms - creating System from LAMMPS data took 77.8013ms - running Model::forward ... took 0.791141ms - running Model::backward ... took 4.39856ms - storing model output in LAMMPS data structures ... took 0.057344ms -PairMetatensor::compute took 104.539ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.22692ms - creating samples Labels (287466 pairs) ... took 22.6665ms - moving neighbor data to dtype/device ... took 45.9093ms - creating neighbors TensorBlock ... took 0.124772ms - converting neighbors without ghosts remapping took 73.0163ms - creating System from LAMMPS data took 76.8416ms - running Model::forward ... took 0.789668ms - running Model::backward ... took 4.42614ms - storing model output in LAMMPS data structures ... took 0.058491ms -PairMetatensor::compute took 103.312ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.26549ms - creating samples Labels (287466 pairs) ... took 22.8947ms - moving neighbor data to dtype/device ... took 47.2927ms - creating neighbors TensorBlock ... took 0.154429ms - converting neighbors without ghosts remapping took 74.7127ms - creating System from LAMMPS data took 78.5063ms - running Model::forward ... took 0.798138ms - running Model::backward ... took 4.42496ms - storing model output in LAMMPS data structures ... took 0.058954ms -PairMetatensor::compute took 105.307ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.38523ms - creating samples Labels (287466 pairs) ... took 23.1254ms - moving neighbor data to dtype/device ... took 45.8289ms - creating neighbors TensorBlock ... took 0.112197ms - converting neighbors without ghosts remapping took 73.5391ms - creating System from LAMMPS data took 77.3355ms - running Model::forward ... took 0.793411ms - running Model::backward ... took 4.46711ms - storing model output in LAMMPS data structures ... took 0.069233ms -PairMetatensor::compute took 104.143ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.2998ms - creating samples Labels (287466 pairs) ... took 22.5969ms - moving neighbor data to dtype/device ... took 47.2391ms - creating neighbors TensorBlock ... took 0.106886ms - converting neighbors without ghosts remapping took 74.3358ms - creating System from LAMMPS data took 78.1345ms - running Model::forward ... took 0.819647ms - running Model::backward ... took 5.43234ms - storing model output in LAMMPS data structures ... took 0.058084ms -PairMetatensor::compute took 105.96ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.16364ms - creating samples Labels (287466 pairs) ... took 27.3423ms - moving neighbor data to dtype/device ... took 47.4395ms - creating neighbors TensorBlock ... took 0.105556ms - converting neighbors without ghosts remapping took 79.1362ms - creating System from LAMMPS data took 82.9187ms - running Model::forward ... took 0.795955ms - running Model::backward ... took 4.45544ms - storing model output in LAMMPS data structures ... took 0.057359ms -PairMetatensor::compute took 109.117ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.20311ms - creating samples Labels (287466 pairs) ... took 22.9165ms - moving neighbor data to dtype/device ... took 47.7985ms - creating neighbors TensorBlock ... took 0.109164ms - converting neighbors without ghosts remapping took 75.1157ms - creating System from LAMMPS data took 78.8708ms - running Model::forward ... took 0.837701ms - running Model::backward ... took 4.43931ms - storing model output in LAMMPS data structures ... took 0.060665ms -PairMetatensor::compute took 105.683ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.28898ms - creating samples Labels (287466 pairs) ... took 22.5997ms - moving neighbor data to dtype/device ... took 47.1497ms - creating neighbors TensorBlock ... took 0.107474ms - converting neighbors without ghosts remapping took 74.2347ms - creating System from LAMMPS data took 78.023ms - running Model::forward ... took 0.826464ms - running Model::backward ... took 5.42335ms - storing model output in LAMMPS data structures ... took 0.057351ms -PairMetatensor::compute took 105.857ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.28581ms - creating samples Labels (287466 pairs) ... took 23.2556ms - moving neighbor data to dtype/device ... took 46.0997ms - creating neighbors TensorBlock ... took 0.115791ms - converting neighbors without ghosts remapping took 73.8493ms - creating System from LAMMPS data took 77.6586ms - running Model::forward ... took 0.812054ms - running Model::backward ... took 4.43172ms - storing model output in LAMMPS data structures ... took 0.05824ms -PairMetatensor::compute took 104.459ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.12055ms - creating samples Labels (287466 pairs) ... took 21.2199ms - moving neighbor data to dtype/device ... took 44.8198ms - creating neighbors TensorBlock ... took 0.110947ms - converting neighbors without ghosts remapping took 70.3644ms - creating System from LAMMPS data took 74.3517ms - running Model::forward ... took 0.798389ms - running Model::backward ... took 5.44111ms - storing model output in LAMMPS data structures ... took 0.06137ms -PairMetatensor::compute took 102.684ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.16484ms - creating samples Labels (287466 pairs) ... took 23.647ms - moving neighbor data to dtype/device ... took 47.7067ms - creating neighbors TensorBlock ... took 0.110107ms - converting neighbors without ghosts remapping took 75.718ms - creating System from LAMMPS data took 79.4892ms - running Model::forward ... took 0.793688ms - running Model::backward ... took 4.43442ms - storing model output in LAMMPS data structures ... took 0.060361ms -PairMetatensor::compute took 106.69ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.18171ms - creating samples Labels (287466 pairs) ... took 26.8085ms - moving neighbor data to dtype/device ... took 47.423ms - creating neighbors TensorBlock ... took 0.106612ms - converting neighbors without ghosts remapping took 78.6078ms - creating System from LAMMPS data took 82.3605ms - running Model::forward ... took 0.794912ms - running Model::backward ... took 5.44181ms - storing model output in LAMMPS data structures ... took 0.060014ms -PairMetatensor::compute took 109.62ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.22628ms - creating samples Labels (287466 pairs) ... took 22.849ms - moving neighbor data to dtype/device ... took 47.0228ms - creating neighbors TensorBlock ... took 0.124639ms - converting neighbors without ghosts remapping took 74.3099ms - creating System from LAMMPS data took 78.0758ms - running Model::forward ... took 0.80199ms - running Model::backward ... took 5.4321ms - storing model output in LAMMPS data structures ... took 0.059076ms -PairMetatensor::compute took 106.135ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.18387ms - creating samples Labels (287466 pairs) ... took 22.66ms - moving neighbor data to dtype/device ... took 46.8876ms - creating neighbors TensorBlock ... took 0.11875ms - converting neighbors without ghosts remapping took 73.9685ms - creating System from LAMMPS data took 77.7805ms - running Model::forward ... took 0.797688ms - running Model::backward ... took 4.43546ms - storing model output in LAMMPS data structures ... took 0.059668ms -PairMetatensor::compute took 103.278ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.87171ms - creating samples Labels (287466 pairs) ... took 21.2273ms - moving neighbor data to dtype/device ... took 45.0825ms - creating neighbors TensorBlock ... took 0.115807ms - converting neighbors without ghosts remapping took 70.3943ms - creating System from LAMMPS data took 74.1551ms - running Model::forward ... took 0.808445ms - running Model::backward ... took 4.37139ms - storing model output in LAMMPS data structures ... took 0.064335ms -PairMetatensor::compute took 100.791ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.17737ms - creating samples Labels (287466 pairs) ... took 22.8726ms - moving neighbor data to dtype/device ... took 44.8031ms - creating neighbors TensorBlock ... took 0.107526ms - converting neighbors without ghosts remapping took 72.0471ms - creating System from LAMMPS data took 75.7925ms - running Model::forward ... took 0.798661ms - running Model::backward ... took 4.46603ms - storing model output in LAMMPS data structures ... took 0.059673ms -PairMetatensor::compute took 102.565ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.15153ms - creating samples Labels (287466 pairs) ... took 23.5642ms - moving neighbor data to dtype/device ... took 46.0935ms - creating neighbors TensorBlock ... took 0.112029ms - converting neighbors without ghosts remapping took 74.0067ms - creating System from LAMMPS data took 77.8098ms - running Model::forward ... took 0.792758ms - running Model::backward ... took 4.4473ms - storing model output in LAMMPS data structures ... took 0.057952ms -PairMetatensor::compute took 104.104ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.32375ms - creating samples Labels (287466 pairs) ... took 23.0384ms - moving neighbor data to dtype/device ... took 47.157ms - creating neighbors TensorBlock ... took 0.107811ms - converting neighbors without ghosts remapping took 74.7455ms - creating System from LAMMPS data took 78.5296ms - running Model::forward ... took 0.827014ms - running Model::backward ... took 5.48426ms - storing model output in LAMMPS data structures ... took 0.060777ms -PairMetatensor::compute took 106.374ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.19645ms - creating samples Labels (287466 pairs) ... took 22.9036ms - moving neighbor data to dtype/device ... took 47.1023ms - creating neighbors TensorBlock ... took 0.116411ms - converting neighbors without ghosts remapping took 74.4102ms - creating System from LAMMPS data took 78.1717ms - running Model::forward ... took 0.795452ms - running Model::backward ... took 5.42579ms - storing model output in LAMMPS data structures ... took 0.058078ms -PairMetatensor::compute took 105.425ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.23505ms - creating samples Labels (287466 pairs) ... took 22.9348ms - moving neighbor data to dtype/device ... took 47.0822ms - creating neighbors TensorBlock ... took 0.111727ms - converting neighbors without ghosts remapping took 74.4538ms - creating System from LAMMPS data took 78.1915ms - running Model::forward ... took 0.791693ms - running Model::backward ... took 4.43297ms - storing model output in LAMMPS data structures ... took 0.057177ms -PairMetatensor::compute took 104.943ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.25868ms - creating samples Labels (287466 pairs) ... took 23.5388ms - moving neighbor data to dtype/device ... took 47.6249ms - creating neighbors TensorBlock ... took 0.107923ms - converting neighbors without ghosts remapping took 75.6246ms - creating System from LAMMPS data took 79.3737ms - running Model::forward ... took 0.799768ms - running Model::backward ... took 4.45003ms - storing model output in LAMMPS data structures ... took 0.059326ms -PairMetatensor::compute took 105.848ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.05246ms - creating samples Labels (287466 pairs) ... took 22.8078ms - moving neighbor data to dtype/device ... took 45.0724ms - creating neighbors TensorBlock ... took 0.111755ms - converting neighbors without ghosts remapping took 72.1313ms - creating System from LAMMPS data took 75.8856ms - running Model::forward ... took 0.796093ms - running Model::backward ... took 4.43689ms - storing model output in LAMMPS data structures ... took 0.058598ms -PairMetatensor::compute took 102.295ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.13317ms - creating samples Labels (287475 pairs) ... took 23.3032ms - moving neighbor data to dtype/device ... took 46.696ms - creating neighbors TensorBlock ... took 0.153897ms - converting neighbors without ghosts remapping took 74.3901ms - creating System from LAMMPS data took 78.159ms - running Model::forward ... took 0.829225ms - running Model::backward ... took 5.51044ms - storing model output in LAMMPS data structures ... took 0.057366ms -PairMetatensor::compute took 106.044ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.09002ms - creating samples Labels (287482 pairs) ... took 23.5512ms - moving neighbor data to dtype/device ... took 48.0461ms - creating neighbors TensorBlock ... took 0.152398ms - converting neighbors without ghosts remapping took 75.9422ms - creating System from LAMMPS data took 79.7325ms - running Model::forward ... took 77.5851ms - running Model::backward ... took 83.218ms - storing model output in LAMMPS data structures ... took 0.062749ms -PairMetatensor::compute took 260.335ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.87825ms - creating samples Labels (287505 pairs) ... took 21.3125ms - moving neighbor data to dtype/device ... took 44.8691ms - creating neighbors TensorBlock ... took 0.158792ms - converting neighbors without ghosts remapping took 70.3504ms - creating System from LAMMPS data took 74.1946ms - running Model::forward ... took 176.959ms - running Model::backward ... took 85.9972ms - storing model output in LAMMPS data structures ... took 0.066223ms -PairMetatensor::compute took 358.545ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.98879ms - creating samples Labels (287516 pairs) ... took 25.7911ms - moving neighbor data to dtype/device ... took 43.0523ms - creating neighbors TensorBlock ... took 0.179211ms - converting neighbors without ghosts remapping took 73.1407ms - creating System from LAMMPS data took 76.9685ms - running Model::forward ... took 0.796504ms - running Model::backward ... took 5.46691ms - storing model output in LAMMPS data structures ... took 0.0573ms -PairMetatensor::compute took 103.437ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.85184ms - creating samples Labels (287537 pairs) ... took 25.9099ms - moving neighbor data to dtype/device ... took 44.8881ms - creating neighbors TensorBlock ... took 0.144284ms - converting neighbors without ghosts remapping took 74.9193ms - creating System from LAMMPS data took 78.7224ms - running Model::forward ... took 0.796368ms - running Model::backward ... took 4.41124ms - storing model output in LAMMPS data structures ... took 0.037555ms -PairMetatensor::compute took 104.07ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.94243ms - creating samples Labels (287557 pairs) ... took 27.4788ms - moving neighbor data to dtype/device ... took 43.7007ms - creating neighbors TensorBlock ... took 0.149797ms - converting neighbors without ghosts remapping took 76.4321ms - creating System from LAMMPS data took 80.292ms - running Model::forward ... took 0.786151ms - running Model::backward ... took 4.48245ms - storing model output in LAMMPS data structures ... took 0.056347ms -PairMetatensor::compute took 106.904ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.86605ms - creating samples Labels (287595 pairs) ... took 24.2911ms - moving neighbor data to dtype/device ... took 42.1352ms - creating neighbors TensorBlock ... took 0.132483ms - converting neighbors without ghosts remapping took 70.5209ms - creating System from LAMMPS data took 74.3363ms - running Model::forward ... took 0.794169ms - running Model::backward ... took 5.45675ms - storing model output in LAMMPS data structures ... took 0.056627ms -PairMetatensor::compute took 102.031ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.86863ms - creating samples Labels (287636 pairs) ... took 24.6937ms - moving neighbor data to dtype/device ... took 42.6833ms - creating neighbors TensorBlock ... took 0.140299ms - converting neighbors without ghosts remapping took 71.514ms - creating System from LAMMPS data took 75.3745ms - running Model::forward ... took 0.796606ms - running Model::backward ... took 5.46897ms - storing model output in LAMMPS data structures ... took 0.057387ms -PairMetatensor::compute took 103.604ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.0149ms - creating samples Labels (287687 pairs) ... took 30.3688ms - moving neighbor data to dtype/device ... took 47.8568ms - creating neighbors TensorBlock ... took 0.171662ms - converting neighbors without ghosts remapping took 82.5799ms - creating System from LAMMPS data took 86.4301ms - running Model::forward ... took 0.814056ms - running Model::backward ... took 5.48045ms - storing model output in LAMMPS data structures ... took 0.057549ms -PairMetatensor::compute took 113.786ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.93499ms - creating samples Labels (287777 pairs) ... took 25.0759ms - moving neighbor data to dtype/device ... took 42.6198ms - creating neighbors TensorBlock ... took 0.145933ms - converting neighbors without ghosts remapping took 71.8792ms - creating System from LAMMPS data took 75.706ms - running Model::forward ... took 0.793044ms - running Model::backward ... took 4.46085ms - storing model output in LAMMPS data structures ... took 0.056909ms -PairMetatensor::compute took 102.163ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.8423ms - creating samples Labels (287896 pairs) ... took 25.1011ms - moving neighbor data to dtype/device ... took 42.3414ms - creating neighbors TensorBlock ... took 0.137188ms - converting neighbors without ghosts remapping took 71.5242ms - creating System from LAMMPS data took 75.3401ms - running Model::forward ... took 0.785898ms - running Model::backward ... took 4.44117ms - storing model output in LAMMPS data structures ... took 0.056591ms -PairMetatensor::compute took 101.976ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.94416ms - creating samples Labels (287958 pairs) ... took 24.3388ms - moving neighbor data to dtype/device ... took 42.0271ms - creating neighbors TensorBlock ... took 0.144518ms - converting neighbors without ghosts remapping took 70.5601ms - creating System from LAMMPS data took 74.3993ms - running Model::forward ... took 0.840508ms - running Model::backward ... took 4.42589ms - storing model output in LAMMPS data structures ... took 0.058294ms -PairMetatensor::compute took 101.098ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.91144ms - creating samples Labels (288069 pairs) ... took 24.7845ms - moving neighbor data to dtype/device ... took 42.2546ms - creating neighbors TensorBlock ... took 0.152261ms - converting neighbors without ghosts remapping took 71.2304ms - creating System from LAMMPS data took 75.0196ms - running Model::forward ... took 0.797094ms - running Model::backward ... took 5.46408ms - storing model output in LAMMPS data structures ... took 0.056855ms -PairMetatensor::compute took 101.771ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.881ms - creating samples Labels (288176 pairs) ... took 25.8646ms - moving neighbor data to dtype/device ... took 42.9685ms - creating neighbors TensorBlock ... took 0.148208ms - converting neighbors without ghosts remapping took 72.9953ms - creating System from LAMMPS data took 76.7733ms - running Model::forward ... took 0.815417ms - running Model::backward ... took 4.56774ms - storing model output in LAMMPS data structures ... took 0.057017ms -PairMetatensor::compute took 103.805ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.10785ms - creating samples Labels (288330 pairs) ... took 26.4028ms - moving neighbor data to dtype/device ... took 47.7583ms - creating neighbors TensorBlock ... took 0.167695ms - converting neighbors without ghosts remapping took 78.5742ms - creating System from LAMMPS data took 82.2877ms - running Model::forward ... took 0.828349ms - running Model::backward ... took 5.24852ms - storing model output in LAMMPS data structures ... took 0.046771ms -PairMetatensor::compute took 108.888ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.8536ms - creating samples Labels (288474 pairs) ... took 25.3589ms - moving neighbor data to dtype/device ... took 42.7507ms - creating neighbors TensorBlock ... took 0.147038ms - converting neighbors without ghosts remapping took 72.2234ms - creating System from LAMMPS data took 76.1239ms - running Model::forward ... took 0.796708ms - running Model::backward ... took 4.29603ms - storing model output in LAMMPS data structures ... took 0.045503ms -PairMetatensor::compute took 101.346ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.86989ms - creating samples Labels (288678 pairs) ... took 25.0448ms - moving neighbor data to dtype/device ... took 43.4583ms - creating neighbors TensorBlock ... took 0.128371ms - converting neighbors without ghosts remapping took 72.6046ms - creating System from LAMMPS data took 76.4345ms - running Model::forward ... took 0.81739ms - running Model::backward ... took 5.28755ms - storing model output in LAMMPS data structures ... took 0.047229ms -PairMetatensor::compute took 103.19ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.94098ms - creating samples Labels (288844 pairs) ... took 24.8531ms - moving neighbor data to dtype/device ... took 42.6709ms - creating neighbors TensorBlock ... took 0.1316ms - converting neighbors without ghosts remapping took 71.6905ms - creating System from LAMMPS data took 75.4517ms - running Model::forward ... took 0.802039ms - running Model::backward ... took 5.31365ms - storing model output in LAMMPS data structures ... took 0.046767ms -PairMetatensor::compute took 102.01ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.07971ms - creating samples Labels (289027 pairs) ... took 25.4189ms - moving neighbor data to dtype/device ... took 42.4057ms - creating neighbors TensorBlock ... took 0.146186ms - converting neighbors without ghosts remapping took 72.2031ms - creating System from LAMMPS data took 75.9867ms - running Model::forward ... took 0.796918ms - running Model::backward ... took 5.29274ms - storing model output in LAMMPS data structures ... took 0.04836ms -PairMetatensor::compute took 102.418ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.16418ms - creating samples Labels (289221 pairs) ... took 24.4701ms - moving neighbor data to dtype/device ... took 42.9573ms - creating neighbors TensorBlock ... took 0.127983ms - converting neighbors without ghosts remapping took 71.839ms - creating System from LAMMPS data took 75.7468ms - running Model::forward ... took 0.804939ms - running Model::backward ... took 4.34084ms - storing model output in LAMMPS data structures ... took 0.054098ms -PairMetatensor::compute took 101.649ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.97463ms - creating samples Labels (289425 pairs) ... took 25.1675ms - moving neighbor data to dtype/device ... took 42.3033ms - creating neighbors TensorBlock ... took 0.130301ms - converting neighbors without ghosts remapping took 71.6757ms - creating System from LAMMPS data took 75.4759ms - running Model::forward ... took 0.801932ms - running Model::backward ... took 5.2958ms - storing model output in LAMMPS data structures ... took 0.047057ms -PairMetatensor::compute took 101.711ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.9883ms - creating samples Labels (289704 pairs) ... took 25.1155ms - moving neighbor data to dtype/device ... took 43.0204ms - creating neighbors TensorBlock ... took 0.125327ms - converting neighbors without ghosts remapping took 72.3464ms - creating System from LAMMPS data took 76.1621ms - running Model::forward ... took 0.792195ms - running Model::backward ... took 5.2859ms - storing model output in LAMMPS data structures ... took 0.050063ms -PairMetatensor::compute took 102.437ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.00214ms - creating samples Labels (289967 pairs) ... took 25.1019ms - moving neighbor data to dtype/device ... took 42.5831ms - creating neighbors TensorBlock ... took 0.124166ms - converting neighbors without ghosts remapping took 71.9027ms - creating System from LAMMPS data took 75.7085ms - running Model::forward ... took 0.792045ms - running Model::backward ... took 5.32339ms - storing model output in LAMMPS data structures ... took 0.048596ms -PairMetatensor::compute took 102.289ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.98781ms - creating samples Labels (290287 pairs) ... took 25.7117ms - moving neighbor data to dtype/device ... took 42.9473ms - creating neighbors TensorBlock ... took 0.118888ms - converting neighbors without ghosts remapping took 72.8545ms - creating System from LAMMPS data took 76.6491ms - running Model::forward ... took 0.79426ms - running Model::backward ... took 4.36788ms - storing model output in LAMMPS data structures ... took 0.049178ms -PairMetatensor::compute took 104.041ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.99266ms - creating samples Labels (290581 pairs) ... took 25.3113ms - moving neighbor data to dtype/device ... took 43.8251ms - creating neighbors TensorBlock ... took 0.127251ms - converting neighbors without ghosts remapping took 73.3516ms - creating System from LAMMPS data took 77.2188ms - running Model::forward ... took 0.811232ms - running Model::backward ... took 5.35707ms - storing model output in LAMMPS data structures ... took 0.048821ms -PairMetatensor::compute took 103.735ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.0835ms - creating samples Labels (290886 pairs) ... took 25.093ms - moving neighbor data to dtype/device ... took 42.4797ms - creating neighbors TensorBlock ... took 0.121996ms - converting neighbors without ghosts remapping took 71.8705ms - creating System from LAMMPS data took 75.6702ms - running Model::forward ... took 0.819144ms - running Model::backward ... took 5.34722ms - storing model output in LAMMPS data structures ... took 0.047607ms -PairMetatensor::compute took 102.176ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.01827ms - creating samples Labels (291241 pairs) ... took 24.6563ms - moving neighbor data to dtype/device ... took 43.0411ms - creating neighbors TensorBlock ... took 0.119021ms - converting neighbors without ghosts remapping took 71.9252ms - creating System from LAMMPS data took 75.7188ms - running Model::forward ... took 0.814068ms - running Model::backward ... took 5.34419ms - storing model output in LAMMPS data structures ... took 0.047731ms -PairMetatensor::compute took 102.234ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.97971ms - creating samples Labels (291542 pairs) ... took 26.4914ms - moving neighbor data to dtype/device ... took 42.7634ms - creating neighbors TensorBlock ... took 0.171579ms - converting neighbors without ghosts remapping took 73.564ms - creating System from LAMMPS data took 77.3481ms - running Model::forward ... took 0.813763ms - running Model::backward ... took 4.52802ms - storing model output in LAMMPS data structures ... took 0.059323ms -PairMetatensor::compute took 104.036ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.95989ms - creating samples Labels (291870 pairs) ... took 25.0878ms - moving neighbor data to dtype/device ... took 43.3677ms - creating neighbors TensorBlock ... took 0.13649ms - converting neighbors without ghosts remapping took 72.6612ms - creating System from LAMMPS data took 76.4539ms - running Model::forward ... took 0.813403ms - running Model::backward ... took 5.69688ms - storing model output in LAMMPS data structures ... took 0.091004ms -PairMetatensor::compute took 104.832ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.24265ms - creating samples Labels (292210 pairs) ... took 25.022ms - moving neighbor data to dtype/device ... took 43.0857ms - creating neighbors TensorBlock ... took 0.136683ms - converting neighbors without ghosts remapping took 72.593ms - creating System from LAMMPS data took 76.3843ms - running Model::forward ... took 0.793138ms - running Model::backward ... took 4.74492ms - storing model output in LAMMPS data structures ... took 0.091383ms -PairMetatensor::compute took 103.574ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.96934ms - creating samples Labels (292564 pairs) ... took 24.878ms - moving neighbor data to dtype/device ... took 43.036ms - creating neighbors TensorBlock ... took 0.134395ms - converting neighbors without ghosts remapping took 72.1177ms - creating System from LAMMPS data took 75.892ms - running Model::forward ... took 0.799989ms - running Model::backward ... took 4.77885ms - storing model output in LAMMPS data structures ... took 0.093422ms -PairMetatensor::compute took 103.114ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 3.99093ms - creating samples Labels (292906 pairs) ... took 25.5402ms - moving neighbor data to dtype/device ... took 43.2155ms - creating neighbors TensorBlock ... took 0.13094ms - converting neighbors without ghosts remapping took 72.9793ms - creating System from LAMMPS data took 76.7716ms - running Model::forward ... took 0.791921ms - running Model::backward ... took 5.75108ms - storing model output in LAMMPS data structures ... took 0.091747ms -PairMetatensor::compute took 104.958ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.17409ms - creating samples Labels (293304 pairs) ... took 25.5659ms - moving neighbor data to dtype/device ... took 43.0677ms - creating neighbors TensorBlock ... took 0.142192ms - converting neighbors without ghosts remapping took 73.0583ms - creating System from LAMMPS data took 76.8456ms - running Model::forward ... took 0.79383ms - running Model::backward ... took 4.73772ms - storing model output in LAMMPS data structures ... took 0.059033ms -PairMetatensor::compute took 104.285ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.47246ms - creating samples Labels (293615 pairs) ... took 26.963ms - moving neighbor data to dtype/device ... took 47.174ms - creating neighbors TensorBlock ... took 0.171036ms - converting neighbors without ghosts remapping took 78.9197ms - creating System from LAMMPS data took 82.6835ms - running Model::forward ... took 0.800391ms - running Model::backward ... took 4.52238ms - storing model output in LAMMPS data structures ... took 0.063791ms -PairMetatensor::compute took 110.162ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.29547ms - creating samples Labels (293939 pairs) ... took 26.9884ms - moving neighbor data to dtype/device ... took 46.889ms - creating neighbors TensorBlock ... took 0.118789ms - converting neighbors without ghosts remapping took 78.3778ms - creating System from LAMMPS data took 82.1475ms - running Model::forward ... took 0.831121ms - running Model::backward ... took 5.58461ms - storing model output in LAMMPS data structures ... took 0.058439ms -PairMetatensor::compute took 110.362ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.46633ms - creating samples Labels (294384 pairs) ... took 27.0357ms - moving neighbor data to dtype/device ... took 46.0578ms - creating neighbors TensorBlock ... took 0.118081ms - converting neighbors without ghosts remapping took 77.7843ms - creating System from LAMMPS data took 81.557ms - running Model::forward ... took 0.818014ms - running Model::backward ... took 4.5549ms - storing model output in LAMMPS data structures ... took 0.059593ms -PairMetatensor::compute took 108.802ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.32953ms - creating samples Labels (294730 pairs) ... took 27.4958ms - moving neighbor data to dtype/device ... took 46.6081ms - creating neighbors TensorBlock ... took 0.119943ms - converting neighbors without ghosts remapping took 78.664ms - creating System from LAMMPS data took 82.4838ms - running Model::forward ... took 0.822318ms - running Model::backward ... took 4.58338ms - storing model output in LAMMPS data structures ... took 0.05651ms -PairMetatensor::compute took 109.732ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.37008ms - creating samples Labels (295128 pairs) ... took 25.3038ms - moving neighbor data to dtype/device ... took 44.1865ms - creating neighbors TensorBlock ... took 0.12486ms - converting neighbors without ghosts remapping took 74.0953ms - creating System from LAMMPS data took 77.8753ms - running Model::forward ... took 0.81465ms - running Model::backward ... took 5.5608ms - storing model output in LAMMPS data structures ... took 0.057318ms -PairMetatensor::compute took 106.207ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.53388ms - creating samples Labels (295493 pairs) ... took 27.951ms - moving neighbor data to dtype/device ... took 46.5945ms - creating neighbors TensorBlock ... took 0.118681ms - converting neighbors without ghosts remapping took 79.3018ms - creating System from LAMMPS data took 83.0563ms - running Model::forward ... took 0.844475ms - running Model::backward ... took 4.60739ms - storing model output in LAMMPS data structures ... took 0.057325ms -PairMetatensor::compute took 110.277ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.42942ms - creating samples Labels (295884 pairs) ... took 32.9889ms - moving neighbor data to dtype/device ... took 44.0055ms - creating neighbors TensorBlock ... took 0.196886ms - converting neighbors without ghosts remapping took 81.7571ms - creating System from LAMMPS data took 85.5441ms - running Model::forward ... took 0.841437ms - running Model::backward ... took 4.58532ms - storing model output in LAMMPS data structures ... took 0.05781ms -PairMetatensor::compute took 113.075ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.14909ms - creating samples Labels (296299 pairs) ... took 28.1697ms - moving neighbor data to dtype/device ... took 44.354ms - creating neighbors TensorBlock ... took 0.167661ms - converting neighbors without ghosts remapping took 76.9825ms - creating System from LAMMPS data took 80.7883ms - running Model::forward ... took 0.834223ms - running Model::backward ... took 5.61081ms - storing model output in LAMMPS data structures ... took 0.056749ms -PairMetatensor::compute took 109.039ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.11184ms - creating samples Labels (296691 pairs) ... took 28.298ms - moving neighbor data to dtype/device ... took 43.5374ms - creating neighbors TensorBlock ... took 0.137286ms - converting neighbors without ghosts remapping took 76.1969ms - creating System from LAMMPS data took 79.9782ms - running Model::forward ... took 0.865365ms - running Model::backward ... took 5.57801ms - storing model output in LAMMPS data structures ... took 0.056578ms -PairMetatensor::compute took 108.461ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.13337ms - creating samples Labels (297135 pairs) ... took 26.1319ms - moving neighbor data to dtype/device ... took 46.508ms - creating neighbors TensorBlock ... took 0.14884ms - converting neighbors without ghosts remapping took 77.0658ms - creating System from LAMMPS data took 80.8101ms - running Model::forward ... took 0.831432ms - running Model::backward ... took 5.6335ms - storing model output in LAMMPS data structures ... took 0.058471ms -PairMetatensor::compute took 109.541ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.40323ms - creating samples Labels (297532 pairs) ... took 28.7591ms - moving neighbor data to dtype/device ... took 45.2004ms - creating neighbors TensorBlock ... took 0.146514ms - converting neighbors without ghosts remapping took 78.7184ms - creating System from LAMMPS data took 82.478ms - running Model::forward ... took 0.828291ms - running Model::backward ... took 5.62932ms - storing model output in LAMMPS data structures ... took 0.058569ms -PairMetatensor::compute took 110.93ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.47454ms - creating samples Labels (297903 pairs) ... took 25.8619ms - moving neighbor data to dtype/device ... took 44.0263ms - creating neighbors TensorBlock ... took 0.132597ms - converting neighbors without ghosts remapping took 74.5869ms - creating System from LAMMPS data took 78.3184ms - running Model::forward ... took 0.845786ms - running Model::backward ... took 5.62956ms - storing model output in LAMMPS data structures ... took 0.056014ms -PairMetatensor::compute took 106.648ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.10345ms - creating samples Labels (298352 pairs) ... took 25.8355ms - moving neighbor data to dtype/device ... took 44.6317ms - creating neighbors TensorBlock ... took 0.126878ms - converting neighbors without ghosts remapping took 74.7952ms - creating System from LAMMPS data took 78.5922ms - running Model::forward ... took 0.841141ms - running Model::backward ... took 5.62008ms - storing model output in LAMMPS data structures ... took 0.058785ms -PairMetatensor::compute took 107.218ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.34407ms - creating samples Labels (298778 pairs) ... took 25.9249ms - moving neighbor data to dtype/device ... took 44.0698ms - creating neighbors TensorBlock ... took 0.132797ms - converting neighbors without ghosts remapping took 74.565ms - creating System from LAMMPS data took 78.2997ms - running Model::forward ... took 0.836468ms - running Model::backward ... took 5.65783ms - storing model output in LAMMPS data structures ... took 0.06558ms -PairMetatensor::compute took 107.012ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.26671ms - creating samples Labels (299186 pairs) ... took 25.5114ms - moving neighbor data to dtype/device ... took 45.3726ms - creating neighbors TensorBlock ... took 0.146522ms - converting neighbors without ghosts remapping took 75.4216ms - creating System from LAMMPS data took 79.2028ms - running Model::forward ... took 0.831292ms - running Model::backward ... took 5.65998ms - storing model output in LAMMPS data structures ... took 0.057471ms -PairMetatensor::compute took 107.858ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.26043ms - creating samples Labels (299630 pairs) ... took 25.7133ms - moving neighbor data to dtype/device ... took 44.2985ms - creating neighbors TensorBlock ... took 0.139194ms - converting neighbors without ghosts remapping took 74.5063ms - creating System from LAMMPS data took 78.2937ms - running Model::forward ... took 0.848767ms - running Model::backward ... took 5.55194ms - storing model output in LAMMPS data structures ... took 0.070877ms -PairMetatensor::compute took 107.014ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.22782ms - creating samples Labels (300081 pairs) ... took 25.5103ms - moving neighbor data to dtype/device ... took 45.0771ms - creating neighbors TensorBlock ... took 0.133913ms - converting neighbors without ghosts remapping took 75.0436ms - creating System from LAMMPS data took 78.8208ms - running Model::forward ... took 0.840829ms - running Model::backward ... took 4.64547ms - storing model output in LAMMPS data structures ... took 0.058274ms -PairMetatensor::compute took 106.422ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.26752ms - creating samples Labels (300351 pairs) ... took 26.3499ms - moving neighbor data to dtype/device ... took 44.4433ms - creating neighbors TensorBlock ... took 0.129019ms - converting neighbors without ghosts remapping took 75.2921ms - creating System from LAMMPS data took 79.0846ms - running Model::forward ... took 0.836149ms - running Model::backward ... took 4.6571ms - storing model output in LAMMPS data structures ... took 0.058405ms -PairMetatensor::compute took 105.853ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.09266ms - creating samples Labels (300725 pairs) ... took 26.2519ms - moving neighbor data to dtype/device ... took 44.6107ms - creating neighbors TensorBlock ... took 0.130586ms - converting neighbors without ghosts remapping took 75.1803ms - creating System from LAMMPS data took 79.0191ms - running Model::forward ... took 0.811081ms - running Model::backward ... took 4.64008ms - storing model output in LAMMPS data structures ... took 0.060501ms -PairMetatensor::compute took 106.259ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.23228ms - creating samples Labels (301239 pairs) ... took 25.9451ms - moving neighbor data to dtype/device ... took 45.4106ms - creating neighbors TensorBlock ... took 0.139287ms - converting neighbors without ghosts remapping took 75.8538ms - creating System from LAMMPS data took 79.7576ms - running Model::forward ... took 0.838543ms - running Model::backward ... took 4.6372ms - storing model output in LAMMPS data structures ... took 0.059043ms -PairMetatensor::compute took 107.613ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.51078ms - creating samples Labels (301663 pairs) ... took 25.7448ms - moving neighbor data to dtype/device ... took 44.8206ms - creating neighbors TensorBlock ... took 0.130675ms - converting neighbors without ghosts remapping took 75.3028ms - creating System from LAMMPS data took 79.1174ms - running Model::forward ... took 0.827499ms - running Model::backward ... took 5.69755ms - storing model output in LAMMPS data structures ... took 0.058519ms -PairMetatensor::compute took 108.023ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.31447ms - creating samples Labels (302128 pairs) ... took 26.3658ms - moving neighbor data to dtype/device ... took 44.9515ms - creating neighbors TensorBlock ... took 0.131688ms - converting neighbors without ghosts remapping took 75.8597ms - creating System from LAMMPS data took 79.6698ms - running Model::forward ... took 0.838657ms - running Model::backward ... took 5.7ms - storing model output in LAMMPS data structures ... took 0.063636ms -PairMetatensor::compute took 108.726ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.13698ms - creating samples Labels (302435 pairs) ... took 26.2556ms - moving neighbor data to dtype/device ... took 44.792ms - creating neighbors TensorBlock ... took 0.138658ms - converting neighbors without ghosts remapping took 75.4175ms - creating System from LAMMPS data took 79.1884ms - running Model::forward ... took 0.811625ms - running Model::backward ... took 4.59132ms - storing model output in LAMMPS data structures ... took 0.059576ms -PairMetatensor::compute took 106.986ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.34997ms - creating samples Labels (302854 pairs) ... took 26.4064ms - moving neighbor data to dtype/device ... took 45.3941ms - creating neighbors TensorBlock ... took 0.159971ms - converting neighbors without ghosts remapping took 76.4253ms - creating System from LAMMPS data took 80.1712ms - running Model::forward ... took 0.817105ms - running Model::backward ... took 4.65088ms - storing model output in LAMMPS data structures ... took 0.056776ms -PairMetatensor::compute took 108.079ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.30485ms - creating samples Labels (303253 pairs) ... took 26.773ms - moving neighbor data to dtype/device ... took 45.9914ms - creating neighbors TensorBlock ... took 0.145932ms - converting neighbors without ghosts remapping took 77.3438ms - creating System from LAMMPS data took 81.1409ms - running Model::forward ... took 0.809335ms - running Model::backward ... took 4.63747ms - storing model output in LAMMPS data structures ... took 0.056778ms -PairMetatensor::compute took 109.009ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.28401ms - creating samples Labels (303629 pairs) ... took 25.462ms - moving neighbor data to dtype/device ... took 45.2866ms - creating neighbors TensorBlock ... took 0.133442ms - converting neighbors without ghosts remapping took 75.2645ms - creating System from LAMMPS data took 79.0295ms - running Model::forward ... took 0.82075ms - running Model::backward ... took 4.57621ms - storing model output in LAMMPS data structures ... took 0.058918ms -PairMetatensor::compute took 107.623ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.43525ms - creating samples Labels (304060 pairs) ... took 25.4936ms - moving neighbor data to dtype/device ... took 45.1384ms - creating neighbors TensorBlock ... took 0.139289ms - converting neighbors without ghosts remapping took 75.3064ms - creating System from LAMMPS data took 79.0662ms - running Model::forward ... took 0.814706ms - running Model::backward ... took 4.64071ms - storing model output in LAMMPS data structures ... took 0.056767ms -PairMetatensor::compute took 106.888ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.3233ms - creating samples Labels (304441 pairs) ... took 25.5078ms - moving neighbor data to dtype/device ... took 45.0776ms - creating neighbors TensorBlock ... took 0.135219ms - converting neighbors without ghosts remapping took 75.1394ms - creating System from LAMMPS data took 78.9663ms - running Model::forward ... took 0.809689ms - running Model::backward ... took 4.66938ms - storing model output in LAMMPS data structures ... took 0.057817ms -PairMetatensor::compute took 106.891ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.31935ms - creating samples Labels (304791 pairs) ... took 26.1111ms - moving neighbor data to dtype/device ... took 47.914ms - creating neighbors TensorBlock ... took 0.130429ms - converting neighbors without ghosts remapping took 78.5677ms - creating System from LAMMPS data took 82.3575ms - running Model::forward ... took 0.812575ms - running Model::backward ... took 5.70302ms - storing model output in LAMMPS data structures ... took 0.058493ms -PairMetatensor::compute took 111.347ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.42223ms - creating samples Labels (305069 pairs) ... took 25.5796ms - moving neighbor data to dtype/device ... took 46.0771ms - creating neighbors TensorBlock ... took 0.140139ms - converting neighbors without ghosts remapping took 76.3158ms - creating System from LAMMPS data took 80.0588ms - running Model::forward ... took 0.812627ms - running Model::backward ... took 5.72019ms - storing model output in LAMMPS data structures ... took 0.057552ms -PairMetatensor::compute took 109.303ms - -PairMetatensor::compute ... - creating System from LAMMPS data ... - converting neighbors without ghosts remapping ... - filtering LAMMPS neighbor list ... took 4.32084ms - creating samples Labels (305457 pairs) ... took 26.8623ms - moving neighbor data to dtype/device ... took 46.7501ms - creating neighbors TensorBlock ... took 0.131824ms - converting neighbors without ghosts remapping took 78.1689ms - creating System from LAMMPS data took 81.9768ms - running Model::forward ... took 0.818184ms - running Model::backward ... took 4.7001ms - storing model output in LAMMPS data structures ... took 0.087176ms -PairMetatensor::compute took 110.126ms \ No newline at end of file From 685b8d60b124dd7b40b96854d72ec8a4635e452e Mon Sep 17 00:00:00 2001 From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com> Date: Wed, 30 Oct 2024 17:58:01 +0100 Subject: [PATCH 09/15] Update src/KOKKOS/metatensor_system_kokkos.cpp Co-authored-by: Guillaume Fraux --- src/KOKKOS/metatensor_system_kokkos.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index bbe7bfe98fd..3da8f44b9c2 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -56,9 +56,9 @@ MetatensorSystemAdaptorKokkos::MetatensorSystemAdaptorKokkos(LAMMPS request->set_id(0); request->set_cutoff(options_.interaction_range); // set whether the kokkos NL should be calculated on host or device - request->set_kokkos_host(std::is_same_v && - !std::is_same_v); - request->set_kokkos_device(std::is_same_v); + request->set_kokkos_host(std::is_same_v && + !std::is_same_v); + request->set_kokkos_device(std::is_same_v); } template From 28b741cc74dd05d53d69b7bc2fb6811f41c56db2 Mon Sep 17 00:00:00 2001 From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:01:14 +0100 Subject: [PATCH 10/15] Update src/KOKKOS/metatensor_system_kokkos.cpp Co-authored-by: Guillaume Fraux --- src/KOKKOS/metatensor_system_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 3da8f44b9c2..417184e40b9 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -174,7 +174,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor auto max_number_of_neighbors = list_kk->maxneighs; // mask neighbors_kk with NEIGHMASK. Torch doesn't have this functionality, we do it in Kokkos - Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); + Kokkos::View neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors); Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) { auto local_i = i / max_number_of_neighbors; auto local_j = i % max_number_of_neighbors; From 3407b81626b5e34e83151f305187651876519f7f Mon Sep 17 00:00:00 2001 From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com> Date: Wed, 30 Oct 2024 18:03:37 +0100 Subject: [PATCH 11/15] Update src/KOKKOS/metatensor_system_kokkos.cpp Co-authored-by: Guillaume Fraux --- src/KOKKOS/metatensor_system_kokkos.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 417184e40b9..8eccab9c71c 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -325,7 +325,7 @@ void MetatensorSystemAdaptorKokkos::setup_neighbors_remap(metatensor template void MetatensorSystemAdaptorKokkos::setup_neighbors_no_remap(metatensor_torch::System& system) { - throw std::runtime_error("The metatensor/kk requires remap_pairs to be true"); + throw std::runtime_error("the kokkos version of metatensor requires remap_pairs to be true"); } From b6568d8c7e1be1461697374253c589cbf3442423 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Wed, 30 Oct 2024 18:13:30 +0100 Subject: [PATCH 12/15] Undo unintended changes --- cmake/Modules/Packages/ML-METATENSOR.cmake | 16 ++++++++-------- examples/PACKAGES/metatensor/in.metatensor | 12 ++++++------ examples/PACKAGES/metatensor/nickel-lj.pt | Bin 37732 -> 30459 bytes 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/cmake/Modules/Packages/ML-METATENSOR.cmake b/cmake/Modules/Packages/ML-METATENSOR.cmake index 92e050614b8..294d30af26e 100644 --- a/cmake/Modules/Packages/ML-METATENSOR.cmake +++ b/cmake/Modules/Packages/ML-METATENSOR.cmake @@ -4,14 +4,14 @@ if(CMAKE_CXX_STANDARD LESS 17) be set to at least C++17") endif() -# if (BUILD_OMP AND APPLE) -# message(FATAL_ERROR -# "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, " -# "since this results in two different versions of libiomp5.dylib (one " -# "from the system and one from Torch) being linked to the final " -# "executable, which then segfaults" -# ) -# endif() +if (BUILD_OMP AND APPLE) + message(FATAL_ERROR + "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, " + "since this results in two different versions of libiomp5.dylib (one " + "from the system and one from Torch) being linked to the final " + "executable, which then segfaults" + ) +endif() # Bring the `torch` target in scope to allow evaluation # of cmake generator expression from `metatensor_torch` diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor index 9b93563a5c9..59a32c89e4a 100644 --- a/examples/PACKAGES/metatensor/in.metatensor +++ b/examples/PACKAGES/metatensor/in.metatensor @@ -3,7 +3,7 @@ boundary p p p atom_style atomic lattice fcc 3.6 -region box block 0 8 0 8 0 8 +region box block 0 2 0 2 0 2 create_box 1 box create_atoms 1 box @@ -12,16 +12,16 @@ mass Ni 58.693 velocity all create 123 42 -pair_style metatensor nickel-lj.pt device cuda remap_pairs off +pair_style metatensor nickel-lj.pt # pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/ pair_coeff * * 28 timestep 0.001 -fix 1 all nve +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0 -thermo 100 -thermo_style custom step temp pe etotal press vol cpu +thermo 10 +thermo_style custom step temp pe etotal press vol # dump 1 all atom 10 dump.metatensor -run 1000 +run 100 diff --git a/examples/PACKAGES/metatensor/nickel-lj.pt b/examples/PACKAGES/metatensor/nickel-lj.pt index 23f5c393b1a8d8899edc1394945a838338eed94b..7128011161a30f38562c620ece460d09482b42d6 100644 GIT binary patch delta 23652 zcmYiNb8sch^EM8j*x1;%ZQItyHcsrE*tV06ZEkGaw!QHt+5GO$^L_t#r)p|yx_Wwg zYO1EEyRSYj1OK=IOT+`=0N??RHfDAf_Kfzn%obj5E+)(l7H%fyCT=EDV$cBH|JQuU z{+nBn@sW_Rv9NJ4vT!o8a%r$~@Ue68v9U3+b8xV8v(mG$@UgIvF_4e}?M>V)om?FL z$8X}|z{SBRVdCOx<4ESOA`b&Qn_r(T0{{U2*HuzBz;Gdm0rW}r_$h(WP!LjL)F}rh z#7Qb}Nr5#GWB{fV=@|0=sC55FMNAO^hlFGOpO}x%45Ji9a54x&080ua6bTr^|FIw= z{ihHU%l}yV%67E=$AXIn;Q#+SWCx8$DaHgOpa;6}~!TzTi>;K6>2hHGZIc`lp z>=-rP%gY2}(nMp>LeS33|5c$VhjEz4meiTYaA^J|6IOC)2KoLvd+p+B<(*{l0dNSE zVe`CY=jUgis!`ZV_n+))nW}Dc{I|RkQFIf1t==;y4(al5O}I{MD)uqeKJqBzCt zJJ-hl@bu#5X(f1>-BJ;K3CiDT*!s=%qr&=qr;_2_A=}?yAx98YaaG^&Pu}39pdg-i zkmJ2web?hXU)$UH8>q+K4JoS&_<-Hho9+5NLBwMec_$g3e0FnKXkqlZNKW=C9B-hi zU7Pqr^u;Cvyb_@Tc1HM_CNjTt?5T|=rfz%-JMj%c6kTm!y!TGT2vn%1VepNLXs^oY z=Jigz8>EOerk=tqbgBu}_QAhi^My(D`S%HzHE>d>&&<<3B;9?pyE~_BH717+g*j9t zhIh|bB(w1aPT$3v`E{TVa!|>1Z4oYG>w4(5MW8ft`9pbeg6PjR{oKA&B3XSCHLfnx z#}M&`QB2KNo57L6An3UH?QbfPCwDN*E2*uKF#{7NZPihw9c9>wJnizzYxjkh)6ak^ zcuBgBR1N=26#%8ucR589IN4e-Ha{1t0WKt?E@tmyy1)19(OHHk4r7h)_)0-?;FzOw zo>#8+t7IB`52bI1-N3JoocjnJ+}pHQ#A0U?L5;xXEd$aSFc1y68$T_y!07|xS{lb_gB>4UOb8C;ldWCY5ay2RK#5f{@_o`6|sT>Npm$|4+2AZf$yb+l|YX!W`Fv+aC?sR zMev*hp2p@cS3O_aKNcbw(BAiE7qk!?Icvgwz8UmAb0Bz%^CyYF1|5jcY#rNRz}CR6 zjXO|IkIx>M!a1u^u%HmezC1uBmY$|eAkT+&Q%kR~H3$A-r4x1U47kXdSUNE3QBpt3 zXFf!WjI0J2zFl zQXA-P26A2~5*m{F0qr-*!CcQ3*uKLho0O&Z4Y6Rf9s z+q^-4JSFPd$3A-Ku)t>FlsI6@ zCra2%x|vVrm&R5UL|uEG2lkH{v#BtOLV_8MZ)zloGW{0PC7~ZE_IJW+MybBSt&^nqbLEiz#C`t0 zi31gQqW}1(+{ScY>W+mc+m0;R)<)Qpt_+(+XO1fl;BmxylJhwKrW;SP6~9k&x{|7e zXaQ~GO3q187-h|*-o7R}{}Rx>EH=kH*PLClox_|%ms$4}95u&gZo&O7pPxO?YFV99#Yp*i>v+6BBgkZ@C)>s=|SPP;n z>?-2ujvGqWE@@igI8-}^QW8lOF{z4-#%)*tV+FV{V-ZaV({bT4_lT?$4>l-=Cd=ld zcuxb8t;(A?Mw-f2FaF_NUBA-5Ti%5jIBA7 zWzoR3gJ^9QJp0Jdxr~NH%MU9??icWMGf?yqYzJmMl(^BiO)&E|XUKe>yTFE`-i;T} zk!oszh4rMu-p4IBN{mCHEf7?tw=hl(MTC(+xGmUk5NbwPtNw^mqR6K%2@a0c5X4X! z|UH`8BLEx<3VQBrW^6c3^;a?`)Y-gedcSR79_sI^vdE-anL%wKV84`6BVxzNw zfFiZzuV(%%`*#sG4_mYVBS|PqI@6ng@-JoX58CV(UX@40z9_1N4sK9W*O0Z~?Kt^K zDqOS`-nI2tIyAmSX?!5MelBnmmV*Y}6si)N^%yo~lNqa5rp$`O^x274fZ`}6a@v6j z@zT9-%n*An#PoRaf;53^m`;-Z#?;KK$eJUHck`UL1}Y*8N=d(F?LalXt#IX0@_OtE zOXCl8iyNBJ7J3=j*a9f(WaA8ZQV0h|0C|0F3OC-0EP$|^%i`hr zvYS)lLMFDc=awFNKaypQe~OIWHVL>$#x;^L>K04|zn5F|F@EE8vMX)^=?o8c9Em0G zCjx2?0r%=+PYU_z)CCv3LWlgh!WT8j6OTr`&Bc`IL807EB@eRWM1kUCDvHuA%BGKW zu=~T2EQ^$tABUiam^ldDOfBQ*uxR5WaJsUawM?)a@=dGyLw#<2%xA+~pmXDNJR0x3 zC--)OB|sQ=nd`5_h+tPA>&aPQ9t!x z3acU*bSM%g(H1Bxg6xO15BgJF#)M_e*O@Y;^`>FFCe@XKs_XDnWwv-&kKJBH5<)AZ z@uf&K1pkf9eg?%J4@o7JVO+A!IZC1+*VJh+27bL^p6L;=ZQ|jKpFkp3>N)IoKCsG) zQ_aI#VwvY^?Lm6k%YLnN2Kz2-Edz%opcd>G=xkiJBm%@j(iBrpp#zM(o|u#cZEz#9 z1*cTPuG(AEVM6KemA@3bG;$+xaEw|9hw3UGV|Rfi`U|&Ha?wmEphL9?xbk3nVHQfd z%&n|NIT6E7yp7W9dzVli62j*rzH{HV_lL0b z;p+1R754g%@Sos-(+21xVJ*9Yg!YGjkNj&2WC;4uU&E6d$yunVw4|0I!j>5Wr3vtrSAt0PO>Mx$0`0r@wJvy@wa3io0@2ca` zM10qPPd8^od1?Q{U%9A*`~F7SM!W80*Xt0U5%1Pc0yy)7CHEw!FUcy|7Vgav?=f71%gJTn4dXt=TmSJucJ4P?>^1cQCvT`W zg1Iu1|C8|)M-384mlY{h&3>h7>XN45%$7GzunA$Vc^_y#sawa92XPw7!2UH4dE!rJ zjnNz4Q>`7O-9p$ph_G@Z`m?>_&e7=1k;|Cqcbg<|l57?jn2C;`UiS~-f9^?$ogh6X zA^`Bs@;|pCr4KI%l%@;kMKF@q=2+>D!(!~$ej-k;mu~NfV19%Eabjh5R2+x;$GVI% zPB42xA4`mtHG@8P0Or-k(X1cZwEzw-bo)Ko9!+e#vCHU>h3sJX4n+<#Mb~%r?Zrhl zF0Y~gi}k;M)rLJj&zCE{E1N%U^Vlq>GauJ#iYc$UxD7h~fr4-4SL?fg&y%;$^67QD zNjT^YA_=moI%7u}Q3VYY7*Bop)1#?rdDd4PDXMDHglyvC;;z_O*>{|B z#3%Om3kkVkbWqrF5OQSs^JMwVY4Q~|D+jz72%?^|9u$07Qj}WpqHL=N(<~`$Yi5Kd zA2ZJ7tgeAX{iM-b7CX8uW; z4it!2Vg;WH48idaxyH{WBeBC`3$crAkv~lT`>*y<=PP8XX3|@PKj6lO*{}AjHu?QY ze=55RFf*2%|1lGv%Q=iu>r@ttbOj~x(OLD2a=p|C*cfR{lZPi z68}n40rl~k=T!dlqQbLua6M|vzIs@3aG=1eQ50I3B+gSBuwFzCkMQQYFs9I2R`~1E z^}8z{zBP7O=c+~E;wEJ12EyZapTGQ~lDWP0kK5Vk8!a`689kPhqnejNTHhQJerUfj zRsm(!-JG1#B%a>Pnds|i`P>U+T4Iin17N5m45(O1PX@6}5&w!e)|q7j*4`tymDNcr zfB4BX6Z>z6)OAp)!wE>Qtw!0>p65oTkC2^|d6B=SoB2w(J7v+CoOM<=pW1IK{V`+# z<27!Ga^}Em6w!35gzasf<8iwQdlmve-VacEUE61|2GGeuk@cIP$ zc+GpXs6Zwa)^n>gf6G3) zGz)Wglk(!fS+SNFbm*kxFgCLZZ98taQ*L*SX$X}~qssNzqch2p;6@jdo`BND>~iF% zyVk9kHWMl8d~O&E2tAfMR@fJ5@%;YrLV9imHm#@9!fRE8W&8zh?H@AFChd6^AVr)V z0}{73&dy9d-&N5RDhGom;du5pj?Xi6Sd6^B;uxyRj`!@^jcg|iWq1aoaB*W6yh#bo zd@!qQv#fui!-6Y(3Uev!s`#f_)r8g~o@z0i^!0J{vr9`O*8KJRhy#JMO7LYz5S>ha z&7SOA5sj4XOWT6eoW+y*@mdO;Kr@26_OwNN;8!)K^1xWWd|iPu^V)+rmrWW~5qYNE zo&=KpIpWyii&X(5BO!LP0kE!-o0yQzsb#Wmp&(29H+Faedi#hX9?!wuXgkLXxpVUg z1F6I5;JA~l`RS3&ZY3*B0{=*z4HLa)(V^?}ep+)=to(5S1aKXoDr{7to&@6ePMD{ig3~(Z`7YMe3ZU+Wc^4U0771nC0vb93OP#| znlKVb0=~_+;_Ld~SG|lt?P8B=ztdKg8XWM|tan#4L>JJe5!W3^K|JR8$&bdmn!yf9 zI-8>^w)Jh2ArKHDm2Edk6vRj#_P%-?i%VRQ$LvJ>weADKZ5Y90-2=wfdJS_*?;|AW zu+Qzz{o=itaIL%+0wZb+ZgHuXHxYsCYmVO<2t48pvfgHRNtbYg^rpMN#3dZ3 zC53KFgB!6XiMxj(L^dpj3T^C|m})qa0UFMNf$v%>@|>lmsmD)r&exx|Pzfi>u-@3M zN)yCfF$iB1KgKivWaBO>YR7%*9J>fk?(>4JIEmMMAYl-CjD(bO#6*3oAx*oRTIW?_ zcd_jc%8nNK!JK^Fra@X!k<7om8EftH2YI84*Y^O!8%#}LXjV!(NQ$OkgWTxAu^9_9 z$C4u4j2?|EZ?-$ct@6fD;cP-8MyskPE>Ceb5*9^qySB1=gn>(x6MoL3KPLL$&TY%3 z#~+;6eNEzV!Qa34sq@w!8-xEsg?ogb-Pn#Lg1Si*8+%gFQxdR`^R}5QOeI)WtzcZJ z9oXB68GgS7Pq(RCra60xXT@RaUCXlvkK)>UV1lXbyngu|}Fmq|(udiXDx z%aN7%FlnmPW3Cbivr#z3b#))5lrEBK^$fmw!*W55pZ2>vnU+%QNjWudT^!BybX~Tl zppEXW+L-v$n0KN`#xRIblpm0j6!BnZET=F8VEanqQ0?Gyg`Uyo&5!Kvavwpl!#G(_b!n`JJ8aC3Hn#2-g)e#%7d*?^;YpdT#sJ{tB?L=pn;z^ zaaB!<6vnxn`sGk1xTkCu+uiK~Qinc_cAHo+C?kL980v$q8AqXwTMcw|cXPUW7Mj{V z2WMntYIA{qElvHRN(`fMqL?wZ@tc9A&@oJ9D8sudK(NX)#coaw?UtU#bId#=%z#!i z3P(DXl^y>oT*w{!T0%(`a=9Z6P+!P`!yNj!cS|46e;iZZm>e+P5Xn5%DkOl{EzsgAeaDd?+aqoma?!@N9uJ!^sri_`2{VQb9`M zMfi*5@$R}wVnj^uo!Tzpu`d5g($%q$XuY&l?GJC1qe5|mi>plE$liqbO>ED3n!{y; zHT}J9@h5+Uo9fSiTqB`6%FS82YT@Z|ezZ4SVn}7{h2}Q1k`mLa) z;Kdi>Ez`gWTbP*WGR2v1iw7Tc)OE9_uE#*fY8%{qzZ9aY-7SXtWoFrMh)fGs|9On( zxUpbFQ@_`X)B%w^(H-ZMEGbW5%bE{2}UBiDPt%5HAUS16zCy!W01Sw!hJ;+SFXov&J1 zh0R~Ehj6>p&Bhkr?KQS7{Mch$-5#okH*SaWV60TfEqi#f?H(vn>w0lS-372mDd(^!S>;(!BU|E`(>TvakT$t)Ud%px_7C|gx>M8DN&KEKr%XjUx( z>UeDGNaud)=#+c!CLCs^ALzpptg;GN>i6bVnN$j_1{~42@N*c=!}bdl;P9L=yry;5 zKOQBjFL%Q8A4kBQtF}HkoPYC(=hDuiSwS#j+#C{soa@f}J=Gb|V*ZOAte`b&FrZJ3 z!21as#U9|+QqUzu)vHvb54NhM%GlE1eZwQAE%n0F%268V>=xwQD`$0ekSYl>^pmFT(8GQarQi>&V&xkYrKVwsmFU`gLWM&#zqrM2Xols)}}Snw8WH zFpa%26(MUfS`rWBHRXz-Wpmuxn&p;W>*xx$$*1%572{UaV$+^}3?Z#xKno}>8o9mH zBi7fI@q;B~a5UARhI0u(B<0@5xeqr@HC;!G(Lka^Zr@G0vDo0^weFSiR zp#>`?KUFeSR6zrqL}$V=sO7U~8a4|(FH3O}G;1H=e#@M3E%V?T$F}qKMjKJ8<&yo1 zs5j~;6CPXtofzA9pvc$0HrF$7BvPpvWwI8CU7$4| z3nHJ2_Hs}%uw4GPmQ}sy0bV~lMNdDlBHsfn9}W{YoN&TQw#xCIc44{D*&)DD$Q5O6 z$#s8rl*pNs+kobR4qFIio#{d=ucuG6dB_f_7wKOQw8PM6|J+xMQ4sNa)?}kDvsq0- z^-|}CnZH=Cb1-@+6DoFeOeiX-8Zv6dG_FIZ2J>o|@=c$aD>BX$eRj5W5G`kNk+84m0Bj)V>~!&Q9Y4G_AuVZ7>_uNt)$QF0Y?k!C2>UM2H%4` zwRDc@z^w6=YfqaT?rP}^B?=dv4H6xF?vn@6m(IzR@@OZI09|8&Y| zPbHK84FkO?Yx_Ifbryf4gJozgxDaTcnSj{+dcsFZrx}7?b0NLv3(LVWH5DwVz@`fz=?&XOnspP&OtajhsPmgT;BA3G8+`LB-Z($P`{GwmAeP|5K3-uhU})0=oX)^ zo5vpHOf-z!OJUupHtsEBu@#^T^$WMxNa`Eei&ravY_2_acdYDsv;&cT%7$+rE7J6> zoDC`js1RPj7w1dyC1v$p)eOq1aClvn4J=Qi=L#LkEdeQ-RL@FL+Y~t(zY(9fv8^&A z+I8HsH?)mYgY7tD*5w79E2_cxT>e;Wj18sT@idZt{{6jmu zEbvk)Fy`S3v9I(Iyr6rgi?N zTVW=@a1ef_G&to)1R2OifVCeTTX`9!1&gmQfe_32@8M;a%4}ffaF;kc=CfDzZqUwGF+KaJ(TZp_ITktkbdLHm+9Hy!=%D{r5 z-{;qoh|JYx^Z?I(Vugb3D>uiCf-O5+kG{3J9Vc54(D0n#=(cC|h4^?k{8)CuN7(Ea zDD0qTrG@zZF_Dk3p)_2pXmgA9Vh<%6|`oC!fkuSThct_vnLq0x#}ByzzbbVNI$}KjT3GF9T%9EJZZ@T zq~FH|;!Mx4dm}P)YM8i%l(<@2zfWs{!dH|w+{pjf=k??tASvzTB)Uz{m)*C`2K*#G zFK72F|5uR7-<&=RnI+`7tpF@vU9{MD4C-aiD?@Zja|@a^9B!yh7lQaCLlf*P zM0D+|D-{muQe1QtZucxpZ|@lcR+C$Dn?5Qyn3`GI*k3R>Ov@rhT%PM{i1-3QHLMaR z?)dwL+bHKXnWZNXc5WUw3^xkv75{DOabf?;P%stYR+n%f=uYESvL2C^buzWGyUNzPjQh&|%%1UkX8>r{V|o`Xe|0SsPKoT> zUb1FoM}IGv_th&jPmQ=&QBDkk*o8>20sma{Y308S%V5)=7x5&&>qRNmNwFMhcWxLyT1wRLrHOvv>;(wrPcP{R`8^c$$H=3a0KBp>Fmk}`9zysL z0#u-U`bO2{#gzG7)DCJ;NFa{{TNp&s1q%@f!tkhLpK3kyc9!TSF~new${3+P)O^VH zGUz7Vb#wDW2qJ&r+$LvEJ(TT0LfFt1(-s>CgKY2o;=LuF-cc7z2=bL%jA9h3YlPZv zLCc1^C5bl*FG!pBqS#WF|4Tl!mod*|K0ep~aSE2NCD$Q{H3urf$v#7GiRny`yLjD4 zmfgHNLuS?1lS&~yWl1$#SCz5@D-ge!htFhC4-sI#PzWF2ND!I-v0cTAn7)YIj}7!{ z0qZr8+7my*rmQu7dt{>+SL+TXyd5+G>t(P}u}VSwKJVA2D0Sl+x*uZ84$<_+lm#2g zh4>6T$aJZK@%xDszJQPa_@CjLzX`0D|D9@L1Z4pC291?@u!8A^zn_y#G`|sX`mqF^ z$KMYGK@o!aAxMt-6-fJEUS%kCGwXe${4cQr<<)VFiVFZliu{K#lZSx(56G1w8i@|U z0YFYMmM2M>jl+bL5`zacb8*GNfPe%em}wrZYci`AP_g|lI!zCC-J5BC2X;`sjJgi! zKas_0CoNm>Y{v5Qc6hbg3Ij_j4WtC5 zsMAPrsIbO=d@Wlpi6$iMf9`re2K0S@yngTW7{SRrR#jEC-CU00!!%*PfRL}pZ;8(D zb~*+58=Temw2JANzzi%j!&VC>Lk>o6hJqQ^$IWF~1!k8nw9Pd7$! zsWNXCRcVu<$5y!EnaK&4nPw;&bRVk?M9PvTU?7Uhv4>NRxD{itrs+tc95f4_SzA;AJ95FO#)%>Z>tYxe@x{^z#E9u-v20Ar2NOk2MX_8ne zV`6ld_z_cFl&5{Z8Kv8(gBHuXh$c0wJ|5|@FnyxV6x)6xchVMWyev4?zw1A%23SGIA(Hc#^Hu*@26Sv6LS1PD#iYPBBUpi%9N+8CPT{6@A z<9AW~NkmK;C#S7KBd9s0z8Me0BzM(@uG(&+NSzZ)q2VXrUbzIe2vl0M>0~kjrDCI6 zXOR}u`9quPN{hYK!zLPS7mg=;Ip#AfYA*dRT*%jes>0Ia=)dEY7R-Gbmm=OYI>b^TaGY>y2bZ<$RG@rL(nx4nmJL)*)sFYJu*!8#S7#bVP?P zi_%$kWZIpf5_bDxwb|uQmQ&gF0tyzxtji)xB?-9+HPYh1qgdm!RGDCK?ze6}TZB>E z#X#j|gKktjInWx_uzn)|sWvh?cPfVRC+7j@vWH&_VzpunW3%EaLzu@{OI3(qm8LGg zqVB4+#6~dCjg{H1I3@0ivCY8EzteX#OKq;wcF_Er2bzrpc`z(JLJ+`$1Z7E2m#{8u;3P#c@l z+BxXiwm4%IjHH%qX-KjxSV}o4^a7oj1!9X1LIbG6oWEs%AfwEpe$2OU8FJLtS&hGu zP$T++@I86FE^2cb5@$SUc|#a{qf3s}WEZ3l>tG_y8<($*JZUuNb;%q)6?AE<^$XJ) zl~v_hqChayP00WlD_58@=Ka#e#SM&DZ3btX5kyjN{E8*(U8tdV<&~RXq<6~-GS!wi z7X&8Xuh*A}zpZfic@4{s6n+w`uPbK6>x~j-6kt7@lfjVUc*DA+r*0|Wk5Xt#`)Zc9T9ohqy62g{Snn1u?mEfdPNBgD!UQK0|-yKuGwDoot z!N1#t{2ixC1=+o-rmNYAHwyge4+>B`?cIkMw1BGK>Uy*@R))xSBLq9{+cAe!(Igd` zOKq61#?u{*u|vGj9`+e;c|+lS*{;J1!bkogGrVe}R^rOI=MX#jZf)9OlM5#7rBGL^ zJs{7!uLkL}U*nq<3^#%{4jjVt8*o?RSHI!w4v| zo|f^mfW@`nl~>%yZXwJ3Lt?%UtrlCB*qJ=%H8>o;QnvTZOrbkNVMrOzuBBi1WIbbQ z3iN_nTAYgEx5{Vx*wViX=S$Q)ZPukySwZXQ=Qeq($6~+COUE>qn5I;mk8BhPl`^QV z{Zx&TE^n2Fb~G*+M3KhLqDRKfTYG-$HD0?_+S64ohC1hKAnih#6+gy=|0|?RZS7#F zLziwXLL}s6P_mqOKRFZ*a{Nk-3;w+T% zaul1fX7X$A-#gpC<8b54hS{?!)N-TZL&!fZhO#C_TOjjt6u+@%)dg3!DlVmoN2`Ef zgh-Xhqk&SubN}VoLQos%NxwkXf|nPvTG(n$sGGo5h+uo2kheTdTyozA=tbX$ zNQB52R5(Bf`=siJf%{hqqYUti+>h{nk%ixb@2^68!VWe%+|U+$1Htu2z{Q92_BFDM zt*YUm9Ab)sONJhqP;tUd)*_1iKz*Vaxccb`AzW8*(%bLq4bO~xcm?>EwjV+y0DI3m z4lze$85XZ{`KhGla`OvcYH4e~^~}LXyCC7(ov~cNbsOJ{YBcQiYU&-mAfKxPNGK0a=5ic-ADr z`J`{C_&4=5?Z2U4!Cx6DhC}SYgrx7q1wJssI-t)0|MHAUvq8jxiuR16(y)eD78;%s zJI+)%Ldy3-@Ot7Mze|%r9#HFHU>Ubb|BsAP#$vN4}oefjg<6Rc3!#w02{e_MfUf z%g#Hs+bGlkB#^lw)te){5z2ulpchpi+I2btQUn*>)?v@1@tQ2 zQ|0eau^85u~Y=mH<`^vge|6uSxr^#+ms5LR=oKn7DM z2Zs4Q34_u@$RAwS6BxaO;aX(pMl5T5@q&4SZjeskpf@L;aGGs`0i1n)$YMUK;a_4MxsydPTdS;M4^7k_sM6vsK5FbPX2@QtCXPssk$*_iS z)1o4z;DVo)_g&! zgIgLR5Y!v#K$4G?r;^|pXG5;e+=^rF3EySs=Cx_(Nop5)^c~Dh0_<$#HqIPj;r%5| zFWCMVWIGoyYiqSXR$$UKfde@82xf=pa18EbZ{A@!X@ECm6yZ$$YnN64XGk0DGj!k8 z+k>AU?O_`*B`Z+wb8j8HUuDbl zi~RDTc-S{B94&|;A@hlZoQ?qD2^YVf{t1J3SVDKuA-nB^Sifrjqb?w(wF3!=c%Q{6 z(AN?6Lr;7n1w0-=3wSq1aJ>?q)fL(pr17cZV_gj++64%4U0EpB@? zu>~<@2Iz}g&o5c15&nEz2CEz`&Mz%9y1HDNH=hLtp!IcP#5e6F;dqRHv)ZG5F%Dgh zo10v9fcMhLi(Bo{U=iE%ct@~EbBD0&Pao@;s%ek4h9}n-8P7NXTJVhdIs3cgBucrh`#&^!(GoMbxU6Qb$@N!@&z^+u~I^$@OE>TLSn< z#b9B_nrLF$#yv)iIJnO;2gM>`N!}K~zm_ptK=-(|k;BG##j}&twtvj{b+aj&YA~D0 z@%qz~g9R3{hw)z`DbTVLi%2B^0t>6D(G9r47VJZM*M?xMsX({;Ht{2ZOVhhgW(Z%u zzrRbTouDv)9do)V9cY#hUJM=%s$8>98@DpTEAE=iJWL%dH^wN1Y4JD_bd|K`?STfU zAhb6qfwjxJM2uloDD$k6Ht5KuFVn~_HvW}X25s1`qfSANw4>~y*bzl!4sf$A5k8! zob|)lu=VGjZT5~-tZcXO{?y`BX1VV#kl(c5I>Or;4j9u1V>(Q}N-pXQ3JOVpCk8Qn z2FyDokE;)}4g|8N?ZF@yz+QzNR9+sgnfbww%#3g68bkj>|0dl z156iVzt<@cX6$Xbk))#Lb}Bw9fSp8)U@$3Xnsj?gLT|z`ly7Ux76h=!xG8ztj5_V) zyLCBA!gB*zE9VBuM1(k=id?l7y4o-2;o{J(1{GW~Fb77_t}78vWGYI5?)tgsLoFrY z_T0|nMu|7WJ|y*ijob0e-QV7E`3uXO(L9lDWXKKeL#8{)id21FT`(WI>qZn1{ks?a zZW{SgTUM-w{d2nXWitI%&$cx0pm{3p*vasRIn{o8l9`0&hm4$(@%b!Y&;D1J{zs_;E)r(I_O-xTEXgt4J`LJT!tIwW~IF zqymi!b@D0kt^c$8_ak5K%|Db;$<$GU1G#Q>H#u;t-kJCbtg~_;WJL+y3S^UlY5RsF z4p>5RgBsuekaNYh!L}xR4&k|95Er;7-b6w@1h%AyirC_pURk1|jrQch@POyy%`$#z zO5?~+J$C>4RAJkApTm;eVcKGerwS@D{m|FI0QeX0daM=8$_U$>kPSfWt*hcg0 zcpDzTdzmOOERr>hY62&ccZ02B1L0x+%RL}h9Q+ZWeLs_(Xl06h0C0F3mG+V)$;E8| z1=lR%12k@$&|A=#NTtY>#a3IqXMw_)K?7^TqO;FjM?vR(@)T14GMV3egh|*@#!2Id zp_-3a;YW`^3m;$)GGwdXNU%R=+cpVbOQ}+R_ot%-0Mg;!Z3nsjr3C4Xqc|G5lM=Do zaeIp2AyZYA#OW55)xCr{OQ&K9BzmC)fCTyD520lGmUJPK1UpX8nxKjUbz*f->??U@cA3^ zW*`lr{>7PR?&@4W7neBPg$A`&nHYnUtvcK#CJu-qtK}*k7qB0okBjzFJ-RKIw7XuQ zEc1*KbVLdecdNfE>FhxH_@tYB$V9TgX<(*C@mjs*93Fft z+i z7bvs5V2}c+oq}g2-#lj}>%Waop#I(H&rb7R_@3mZp5?k_x=M*tudpfkSd792b0ZL< zeXZQ@{V!VCUxUVD4GRDeQ2jp`c(REjs5BGKjbJ2e%snoC!GuugSl%&#!OpxS$-=(D z0+Mxfz09Dk(W0vnb@BhO;MICugFL9lxamXCTv2f5;vEAAZuXI1usJ&v2{UFB*-Ol% zFp2(CV&DNei6M3y-)lQd>YD7Q>(7tYf4RT;daM5S)>r+VB&%&KEZqM@$^orM?*=*8 z`K}?_-|n<{d|d|ZVxHCMr7_&wB_bq;ZJPUqJ)2mNa8yNjAP3FGE&Rh3`r_wJ)tJc+SBU{h4Vgt&9j(Lv>y>5#Q zR3y3{rS#Riu%q|QNcgN zwcDVJkN#F^(B%iCtlDI2j5m!*qZ`-DkYS=EL^MDS#44s&nv}Yu_}@7=XnQ0`i4{-Y z7XBJOv|OcQ@bA$g=iED0_R<@vv)sJ(zyQ+iN@HF)EnH z+9D6vVbF;1qC-BEcm{70q#-%-FDc$cwpxsZuUa>J`)pHu(Z?X}O#{-?h`!pFzf_#QT_V=lEpZdq-H z)-ZO|0Qr8G7B|MvT93Y3$7CRPzvx)#Dj0=+*P18@XQ$4)Q|dsM@b(v3&0TaT&1r7q z`n4kTw!;_}bPYi^BXyaEQtuZrF@zbh3p?m(l226v5uS>dx# z0U=eUDDleEvu}z)-Ln!RgUjPIwjF{)Zr$_vl;!_d##aEv(QM)37Tn!}+u{&NAh^4G z@Zbxrf1IS zFP&>^0dKhtZU|^i@T5A})e3YuE>Bul-T<->^6-5X*SJauKOH~$vdJ&po}I4aA?&y- zVmVQ8G8TX@e9VUnZhYo2ojxAM4GRK0yGDhPxtat_7k*>H2nEV&@mKm))4j+~Nhf!@ zkz27&XR%-g9Oe(JoI)nAc>1hoR~e$4NY-@md0_yXjbN z8U_7PD+QpYGMeU@Ip{9Ms#0(uOb$4jh~?9-SU{HWoLbko(*&ja?ljr{8HrNHoBDRS zG~2-56Gy`FwAZa)+~`%09-nFN?xed`vjsHa>b76vx{WvvC99-$OiK(_qW0uSvneHr zlT=1W?U0(=?UokCdnwR|Q{8-AD96karNc$l9!<|??2bKdhwr55+WX(GCxbu(_6heR zM0MtT>Q{J>XdV7`nnaEuXJe$>S}mw(%%^TB!d}_W77%cK9!A&PQ@6Y3=8KpSD|CB! z;yI{|8^PJvKee!+j5ZxVAD1>Enx?yr^N)L#wx=o0fDE8^l~ky8oukfF-jPj^>>!Dn zy-|_=fL|)__pM%_)ie_9iv&^!sQg+52iAqIdEVS^F=%%Du$1zlvE*~^6C$v>bk;ka z3LpyM4kYhTo#zF~i zUWxiK2mk0I{&r({;XNPv*I^WGW2TO*FOUYp0)v89Zb?PlAD#S{nWk2ymrfHgh>;n8J|2c z_FUS>Mcu26Ud#JAZ!QDmt!!4qaIz`#y+_#DF4pfB)y|qf9E5Hg+9Ea!bu9cq$ zZC1;FVvzAzQP*+c#blyoAg&j*Ljh{L6Gp5o9}ga5wDCV|EG0F(&2j@}?C~dOm0^5*Hr98=dh61_*d?lMRQfmN9V0>D zr^pD1r>Fjtg=oJw3%7&7_ImFu@-|(5{p3Z`#rHWij;@{N??M*U+tfe(48cza|0F4+;!5$F`9&JIW2KZvF+uN_xr2{MQYKf`Q?9625;~t$IiI>~kyuz{ zIc3MW1XygpjwK*(0S@el)}le=5Q@0chM7)_Tk}cEE!QusF<`0k*Nm~WVaQbYZ+YYB zL2&;WEG{`osi`NAj8_#=@m6dw#T?{Ie;xg+e}XOi8Els|`PaaK>Mh<5(UW{sPn3qI zB{mK)2PmPP=dy|Vt1Tn9(j(~x zM%nL`cHWEYU_;oB02(I{^{{f#J;^OTw2%-&P=brBUD()UyMJ08;h%Wof>}>>+HYcO z?xESj&dz0_H)+02+M*i}+tIq*>?pp>7SWT(5VL_1kq2M5TaNT+`>w8-TXPYayK+SV zeUv?@H&O#SGd4kN(5=YYpFQ-s%;)zIV~ zc&6b60nx!AFnd@hxT%bMr=3(BgRYQeno~Qck!&k?i_oqmHk@Eh9%;dId&8q9Yf}S> z4}wfxxTL9%G_La=nPesTBK_4KDi@IN^gRVjgn@cRYL-8mlsE9MNtS~1(5_>sNOV4T z60GqbO?ACnQ0Cf3|!EnfjaFtO?>S=dyV zC5M2P+%wMUkBvV*V$y`l2nX<6wK-bnp#Dm zcfkCg`u_Q=1m^W6aWI`Rw0JUuQVXYmju3iKs-^_KW zKGt**ZfaUJc2McS5w8@Eov;F{>qip z)n#ZdQTXOdJu{uw%vQXXSTrU+?-@1odIWf=9biw=jeD`EU9T9HoUoqsS2gWIxXuWuuU_@pNO-Z1+{Slsyb{XTe8S@4l&(A1qK2W0e(yUjk|X zRV!v?%WT%)wfq(emKehXN*(f}8K<#Ms75Ss9^AK|i5|o<2@x?uqGwvlZ$+9;0a5OC zZ>^MraY`RpHcZC)k`H`g`6d%YpMW2X4Znk!cpUgmPqVjBU_)`oF3(F7X3W)?KJDd} z4lg}fvB)}ys-2h1?MTNRvwf15A?pdsUpNj7h6z-!Bm}|A3@<{5pCm71ZY{R%5|(0H zD>qxFzbW)@3G3Sz2mh%V^eA)cV^YEP)F2E&f=2 z4ceyP?K2Ur3-=epy(7mb=b{!mbo8)xJ8`sdbhwq8tl8+4Q5hp^`z=s;J9U@ws;DD# zSUP#duVUc|i|6);M`jQm%%@Nyiirj3Aw($bGC}g7Hun$*2irZynLfN{--O>~!l|tJ z+=;0mODP!OK`scE5n^%f~Z{q2BRoEvQg0U?m7QV$RTe?Hl%y zq@a=a@?vIBK+WlYk&hTwlhD@ zA6|5QPa|I>K1sgEEi$rCNlaxVNR;N;-S142Yuvc9wN5^oDWi&T>GWUdCa;qY5eY(b zpWIESsjL8|X|x5B1ukQ&NOi1<8wesv)FL!%kWY8Id6)H|j8csx4?WTMq+!n(&YfXi z!cMRpii=F=9MgsZs$XnhRS-jX*Y;3rbLO2=!^VSt8_Z3>Q98+{nJq(n12cB9BV9P@ zZMI(>__CA)=y0*re>O?FziXf-P{eQCz}dGlaSFt?!L*ite%dxYXc7Y`m`a_>8lDLD z0VJyQ9E-L35van_%AFrQ@Ysc8k>P|MKHYpL7pM;V zeQd=;H2g-~Mg^edl$@I!&!Ja3$2Pf%rsnBIT4*EJQ=TaV-Hd7Dn7%&I>LI#<&AkNY z0V}YAg*11r)xRYuH&lJx9wDh7l?2dc1`)ub)PM2#p)-(yyc!(RDeNhl6ujQSbK~+Cbi4ox4hx6R-7K?Qo;84&6v|CrqSt^&Y{Y?%t2Av_80$lwAY{(I;Un;{vDO|AMLWd7Okpi~OBo5;}cfKbe>&?Bpw3KbCD z@eSEc)XneM?X{<0Zqyb_VTnMY-=we#b6(y}9h1@JL z;eZ-8XD)7;0A^)qNR;?yh7g2@Bw!Du&laiQf`?7MUom~Kc-aCUKLiqGBCT5*9Y!t` zS44C~s}kYyQtdL4K)QYt_K?jz-|9s+>*U(?la_TQ%=nJ*PCv0O?H2lUjKz&}S5 z(^nATA!N^W1Rq$`kR^uqoWqGB0sL@5IOG7fvX!2BD+9Z_I(CARj92jc8Pz#&qR6Us zRwykrurs#UKwcs72=`qod|52Td?opkMzFB<`75K}JY?Pv%KVk!FN|9zb(?w z&NI+Pwn=2vb757$k=2URCg#hcE;_7UK`v9mh+NQF2&&r@e+Ub2259ijx4eUv&d|z} zn^N)hSzH9%cH-ys3m*pKI@++54MiFZn;&e;hfO@(e8Skza>tT>t=`_uTfgc|a{-uX zH(IiRB|zn3a6+jhclz$-!9K=TtYA??V6>pt!>{cs*+*^||b zk;uo`+_iYJ(p7E7D{`$ik%aY-*9Yq00cNW*Y&x6*4-mL7$bHiMjL`7=lCQnX&gVwI zWG~J(dx#u2tIvBhspn!kG2a`?L7I$a$m^lJ?JFnOi1C6w%y%v#&Y$coc(XT@XMKuS z&M*B&KBED}S40ZWCXMz@DmZzYIJLUhzHfCOWmB`DR~o6&t2){lVUhGQ!|c3cWVC;m z9Lj<2-)fEz892>ug&u}6YFt_wjS^00ppX-*y9uDAHNvwzhdpHiPm5?DAW_zX;>RoQ z+}G*IZg*(!+$VWtw8I{_YzQykl zMyiswIsd-!*E`1rBcCF0!a6KV?p*XA- z(r*ZCF3O<|y%rXUjg(Y1(N}lH64#yZ_Z`Yb&8E+Kl30Ceyj-&=c$-EY#yEeexBu_ae-^55zb3%^BN6^r(JjhJkKv6KdN z*-3h00>;>lPSBYnFnO+fHLLo_9`(lBqz0czuUuk!Fgv-%~wvCG%`13#AV z+`1W)o7Hs-A`EY4LH6bpUoV7osBot~2Js}D8$odo;o&o{FYxNyId)&sZk9BRg!$r&B~`X7+KrmJjw|3*W2Yb*GRR+ zWi|b$5!9(ClrorvPOYDz#kF)Iq*LLv>{~1xyv&gr|Is20{Bw~yL(G|Gq64gTQnNRLtf!5+*E$#8b7i!iHw;|rm5$*wC4W~087_- z#EaL5HVxrN=OsY<&F-X+UqxrwjKLF%2{|QaEGN>`O|vhI@4D{Kt3UO zN4x~AfQ^id$Crac)m9!>3VKsL{->L=hVX*u8ytiaXT)i_SONRy-PS%J< zi_o}z{i?X0)wRb}VP-2H)L`z4=}m1mI;~7@NXDyD>&RV+1u5HpUi_;3jk4$El)&n| zP^jihrK?EQ#S7|sw`S`qyyyV{a388!GaSJF&g~jwd`IqR)eQ?Un}Gg&JwpAk_G9a! zKXIiVg8sE>mp1-yS{-KNJWaxyW4MCO+g@9>&hx& zS|?EvI7Num=r}jG=`s&K@II-%ebKlkIY~e*?!he;lGD9gC#7HNsCX!G88q#rfaPfR zIwZT~3$;7Pbb3BbPr>ow-1Sz!Scrf#Gi8+5g1!2oPq^fERt4#9w6}JQi6)n_r#mOF z6O4bol!ZKJqoP4LCoQ?pmLZgqEIIC5F{Nf3MFEd^Dxh4DphM3LrMbvNV?0P>PARAs> zU5SS2{_A4ouF&s(!b4@Vxf=WXsy45iQtG+NT25b|m}A7^_yo4S5nv%Z)Bfsgu(GCQ z=>iQkkD|_Vf!@*50?7oOna^Lor^TvB$lBHa(8xN*xF>8e-cJ<|TBv56Z~4y60j?L< z%^&rtVtN&x7$6_fAxxv7bsA3R6pNkWHLGV)fuA~1iobg5j8Pr5aN$Q=u&_$ zTG=ui)V_{z@jw~|ATM^;svGuRSWdnn^A1VIP;BSHRyqgO0|TQ3@V!qtOACZPuv-`@ zv#YfudW7LmLLwqzZyhWqHQG(<&sF(EVPhInCHdepycozT={#vXT0+C;VZS0~Kb#g( z^NiyuVyBSya0!fCmnn?P+`|c?bfl)9$APe(NtB7+8TRtl<^fdONz+A-N@y%GB!BWp zMEe3~fnEzfu&>VJvq+D#uW91O<4Z~W^lLdGoy9^WH{GGX#LC0{O8@~>%lVpl@TUSX zhleIH2Z1K>H)JI_I(Z_eD!eh5gGnHbH^T=>)WeY6 zU}R-xv|$iB__FSul8)!Qcl}ouUA*P}Ev7M$_dqJF^kZodTcIQ2UDM~bwj?g= z@lkeCh673sMcJh(*|gG$qG`Gw4QJt^D}&8}A9v6EW6#*E99xT;L}#( zph{+wL=*$H!D~3pzSF=cSex&9&aFcG+FP5*DP5pWK;{NZzU*#rt1m~L+diR-b2g3i zvv2yq&@PZqzh_p5!fz*621yQLKSCZC|9wOYdX8qM-KP#qXNx=S`Opku5aAOj8YS$OeXcip>E$*-Si(~c=nK9fBHhs@tO!lhD2~=h zIXdHaF-w#^N)mnObX}@vkxyQ1ZAB5G#x`R83S!#qOs5eRtZ-D>YS=qn5g?svx7C@Z&C@PbvW8b$)s3NH75ziD5 zIzBd1#CeWNe#&CF6b}WZh#})GJvtQ%J(xa|`f*=N z{kX5CX^8|yqY4O0h>M8{3yMjI2nmZxiV6t}3Q34Q$}~TMl0uRqLXWLylHz|RJNk@f z4i-$JCirg&W@@le5(~IKnHsE`M2ngSZcJi$RAL5zZ;~i81aZJqNyHfcl3Er>rbNvI zD%Z=pZ~?<520|LP^9ppgE(`q;`w1M{b_(f?IGLqU1|KLjB> zuwDioxWSh0kGwLtHiek}zfbch{||YO^1sN>(aHoLyWC)UaRP8$1~E7zl@<*}=>I`` zj}Rmc4}AX!_y2+atbzwFO(Ot6pIC#m(HK>+YcDlzy=8ZFw=t4D|h9H~L@|IPE) z9Z*oj|4#DQin06GpjYV(VBQQo@IwYIT7w6eE>i)G+!OpN^95R_C%80|o&Jvxa6R(> z81tk3EtFp1txPF(^8XyI)vN`An<`>2nVt_J46Smckl{D=Q< G?*9P_?HfM; delta 29809 zcmZU41x#LF@FnipR9 z>QD`3L?myyVYl=&YFKJ0#>`O<8dy0g+W!rMkO2dS%=e$kUg)zyCOQl?H3byM{}-jJ zyUo9#|8J0)*xVc)B=E%0DF2hgKgi(FOjd+DjQ{*F-&Z~o!azY0!enAWlfjZhQDkCy zQv5?C`G-hK#Ldpj&c$Nw%f-$0pEa$DL=!3$6clPEwi`Jy4iw-21N~n;*gc$GkIJHoLAZ)%xG7MRv5uOOmjfv&L_Q8@bpZr4ZWzT?) zWZ9LUh-~!4QTESOuo}KjapV_2f4OPVaaZYLZ0+kGxsvYW^7{foT)uBHDc?npbuy5RLz$bBGWzJ4%3bn7QN%g0QD*uQ8% zPm-^1COMJu&BzQI-~8rp&C}u6Du(gvcVS;y(=HQvvUc?WJcD1OG4rpX>X8SAK}uJN z?Z*j&q0osV*xbzkytvH^#v!UnLVZ-6yqkG)oz>X%dmDyIyJ&G?$RBSrU5kDo^4o^_ zE~sUmG7Tp0{1)-7%x$67uP_(W&TXjs1PrG8X)kmXL`Y<;_kuP8Y~1WN-QT7@fPKFe7rVF= zv_AZ}z<#LW;m-g^xO%eHh5B(9b^>*Fh7lD+wIGPp9_O9C8?9aHaC5n((#r9qUVK{J(J$@hxtKq(f40BbvLim+Re~Yw*BXAla^Py_i{%@_ zsBiD#Cea2!5|~a;mA8G)P4G=w4wrG|IJy+(>j*gnQS{D?x2Vu~YRPw)hZsmjm)`MI zGID24H6Hc^6qt&x&i|@|M!PQOzjy}T8|d`Wp;}0&DKJu? z?auF5a0bO1D9xMxU8{VdW4zPI3hZ#XX^brwfm;VMK7Ibf_0rTY` z#$}r5rN)7Y>V`$spv$f00|TS{D@!cwt$tpTs*%`XcWYR_zTf<+Cj#{~R5qb|w`A|x zPE{*VUe4KxgnC}GW_V+N&UIx`*G>NOd7<>*i%o0 z(mmIqmh8tCk6>|m@#OoGlD4AtDi5Q2PM7=s&$R)!lfJVdO?KG+vabZux*V=VosR>YGe!F?~HS9 z)1cqloY^ktW!EFhXWp&WpXwY>?aFE$p&UPszaGdRh~ElO-{KbhDWlK`-DYT4)0BA5 zHJOKT-wW6QNrs$b48EoO{SIR;Ts7@#0XFjcoN4zZ0_PQdypkW@0FHgRphBn@XAT57 zao2%SfVP9qQsgu%sPyb)?ONz_syU0d2PsaIUP0VfgSpy){%SWtO@Ck6U&F$l9(e2R z{LFNkpCJYIIv+F0#B`ru1*)_1QqDB>?xdG!47@ktu@zHgF=>L5hB@h!vCy?od*Fr= z$6MZXQw=pD11?$VF0bY!Rktu14t0SmnW0!;bY(q}>rvSi>~RSsU${@4ZYs|68%e^E zd3FzSGv{rVE2G&p=tXTA*cJh~*k2QlR2bFYv{)cC&BLv&!An<&3#33C`sVr*h#*GlM{ z`EIc(kDSl*j;r{;coct6NWlt7Hk9RK(GaxlvaVT_wJ2sPnd1wzB4QMvl-a}a*zH1_ zu5qZ}mpkdOHOol;E~TYdA%p{xVsG!SGsYL69AHSfZly<4s3jvR)VLFg#*Ue-ij2H$ z98lN?3l}HwUmAKU@QXLTA&v1Kqo>eM_vMWEd>!6HZJffj4>bR=Ui0GzAyj(}vh{Ka zLY^SaaL$C`b(*PrgquLn+!q;FziH~I_yfG}S}u>tw>Z(vwy8%ri(XPddb$8r{1f$x z_hh+=?_|cAwcrjN zZ=~RcPVjpey59Csh$;v0C&kIhq($ST?C5V49>wz!f^BMjS4zV7E`u=piIF2T5a*}$ zZCo`)=jV+Tp%xJ_j~$0JOWK>4@A7X7p>MA}nc4IZY(77ub#`Yb+- z&HhB1Cy<42W_=(7raHH9`=dopbevzVn=In0|2`3z10e(6b@o44roCs8@nFU(u0cuY z{AmNy8`p>qdD_3aCqnD?s6+(NDT30xF}g8o=&@VuT`h}3d_K)l{msB-FQ;;`_>wA8 zs?il-Buwk;Y^jIIz!%Aq?9}D0FriSyjvz$AAXy|<#g?22h~=(HiGR^gW;*>4K{)#2 zNJt`}U;c2f+-}q$Thu0Q1jUT@*U|lmbwZn*6%8)*rvbIe+vtxeP&#}I3XRId53XO> z8Y!2xepBoS3sbJg!ujQ<;sus>8}t|oU406&T|^Ft;DKSpc&x}q+}AZhLd&5(Sgha* z)B70LN@4$aU@n}CK3vm9$)?D&ln7QMiB7}D)55LGIkC77HfB%})^Eil6*khya4jwmm_=G-nQmd6*`d`wyU7fSA6)cInl()%_++|3$Hq&eb-&k)Hx_k zDlAYxN(8f?!c(RniEBp&y?%$9_|sngel`(QXO{m7AWD~Vf14$k?~+t82gZY)L?zu9 z4E(E!qMt1c^FwF&P=O|`R7KPZo{Ph33Yo5pk^rYvKrSm_2J>j@T1G{fzX$$)AvuG!G9i;#Sa>f=2zn{p?M@-lYZq98L_eqcw|vq78BVW~Jz}PC_<|ZS6uR zW(D<0(5`Kn6-P6%F_~adXqdP9nwP~8@c(p4(=$?XtwxS~tBUy^PCkk8;l!_o8nI#9 z4;QXMmn@*c7bnJ(jLz)bNPD!C#`*rNp(J(#@USUs`Nshu6;|89Vq(e6v)O!j)N6iEC z0oqOa-eKxd{!`yNZot^~He-{SVyL;@uuc-?zMTePr1Hs4O3R6Vx%*j|413^5V;-r} zIen1tr(ccs0z%ub7>LOG$irzOyx>zOQYJCp<4;HQfyE)Tzf>+rPQ18uGmO!C1Dl{F zbnmtKhMuabEl5h)W1dQCiW$G(f4@W304UZ;sZh1df)Pf2QdS90952N6U$84Py&_M$b4F69{LN>2|s7gq9fY5wF2R1l7NyN98U z?l|wcM?*RHQTdvo-o3oBHLHi_{_w$GeH+%R6+Y^8D=jvn+ zIi~0t)O+c-Rb$M4mjx5yo5Kc+EV}?tC?lhCR7GQzJMlEp4cHjzi-(jOO7;h7bJwJw zW?rf`DTQQP3a$KLH0&`Gw)#YY(r-0@j>*6 z=b&Xu*5sOm?-PHRk`j!UwB~6*;8zevpHK8Cs>0>5u9th{=Th2%Wxi3qtgzhL`a||! zu_|A%cEdW$d!bz`%d#I{B>xH1o`s_uv%BIUGrVY*X^^m8wI!3c)aeJ0Hmt$ zm7!_frw9P{tI?f*G(ET*^}Ae5kH-_>-o?x0uSWxDAyRrRi=@gJ2pMGr>g8KxC!#g# z(+PLofjIbwHut+4%&zE_!bx4bI{nTSGRBMbOD?)b6MH8ftOETbfZ{^oq|9^EHncp7jLvX{8nPX zZ7WmSoQ3^mN0QfJaqCP~JH4kEs}}1 zpfHP{{92NmGp(c018=WmAzn5a^ybmJL(~Z*=>V*_$crxC`_DE3{KusE-SOA4Dw{BKI5O| zY8e|1v>hzk$?s5V6N_~any@!thNPFC z+aIJEn8- zJ-k1D>i%)!$)ezmTot#M!z10-^nmMAVw1wO)VG?M*LaeLktZfnWdHMb08Z>7-U$!> z0}m0A5;2?Q=ifAANd#R<&18l_Sk06lMrjbj&__vSn+H*OBC6x#TW#?{d+Y1En zrYb!81e$a|O3tv8tE%2mRKDA{Y-aQQD$<;OgW1ipb&=cZ(WB37qeTmiC8)TSFgqb~ zZHl%s;8p#isXjc7PIh_-(^Bm?x+!|zJV_YaZ)D`pD^iBAKaPzAesjpC1&k9Fv9_&J zIH<^;-BvNqbp*ogqzllB)D)swR=*sncn>W-k_Hn6(~jWT1!J8!*yT6eR3~1rwg+R+ zGXLTjAk`M`u-6GivvigHn=a;qa$%Xgvh+E4?+P7m*(=e$aE|USX9}+T#3QJ7**W!a zqU-@bZ*U+~qpV2XK3%BX9vBGp`BZX}_sjjgAue9%aFCk1J$TLWsbkuYVqIyHH$uzJlfxI7bU0hfCK&%eGMk(^W!6ssNOK_3*@OsaS|UW{^&Aw1%mXGnvwcHZZic*B*KL&rJO*NlL@) zeuDhvIb^somrqzE{h7odorOPjOiM4<$0o}*u7Y?=EtREzt6ZA86b~cT@<$%&BWdkL zXexu}xn~q#txh-q!&DL2Yk?K*@q$689lo#JKktGfnIKJR45Yfssna)Y>w7cDaq9W# z>sZD#Y%V6HIP*yu@E+nf!rDg`c8u^p_5CG#AN=m}QqY=PSg~vyx`>v{{mVph^84KV z97wNNLkqF;zC;0ysWq561ZT5mZ#<(o91 zhl^d>7SFF^t>>KON-vunbOLO?o8AclXGL*vMKI*LcWV5Y z-p{%9e_TE7Z}0Di*#!#Dehy(Axn&NW6kF+)lDnShc}Fh89-`a}KbGjQ1afl!)SlCX zfBCKWG{-G^Pup)gbCozA1k0;LT>3HRQRj3*Du^*4)u3t&5^fvBG`;-vp4mD{iZL*Bu82MO>Aq3Y^8;wDG$;)DFf? zTl(cEskP8h)R{MXU--fE&z% z(liU2<2B*cy^gIw3fN7v^v{u1P<4tUMR(w`FPKSSuWneUzz(a-@8@%OKM^cGb1{_OSDRq6Lif*nF5?G2v5q1ClL;Z}!j zv8HgQWaqrow~1ptCqQj+Oy8$KJV=ocz01dvAfhRr>&VF;7{bhH zx+^e$=U^1uP>d^%@4_qAF0@W9E&$NGq&DCS_@tT?KClJicU=Lec6Bb>f(C~%1qnsG z@)t`=)r|M`pXlz*Y$!$~jAwf!$(UqF68Jr`6vt6ToR(p4pg0*cJIRiG$5i>>k?bHO zZ1p+H9DY0#TJTTBhmC{Q_CH&dgR_(qN-{)K_jJyb`fyG}*x8IuJwoCxDFK(yWva>F z*KPlZOXpNy=mjcLRaHA7#Kf4mQM{-UEK#nxF`cCua8OO9^t6Lyq?JVI(qgPq6pSqvy=2@a1$e~I{BR{yfRle2=ZSKFe7$1(;qAeWrS`V=MO zO>Wustx3hM-i3o4Gdpp$0y>QEe)kx4!(&r9qQlzf->=@bEP4usWt%<%i_WdM8dnPm zj(tB$FJc!ais3;@!}4c3J4cp${`ljnk68Z9%6W$1$c0C@xp}27<8*~ZH<{8&v4Wp9 zKg;X#(m(BII1ZGFZu$U#)1Km11AgcY#$y;c>|NOBhbUxY`}GKuo+bOL`(d{4NFHpD zg*>QmHrbb`(KkgXo$sc%3rn{8jC&E~zcpU;+mN##OSI1Ka z`7#izX(1DT-Pm;gW-{SN@-W{3?k!;r+C9JTMk9PGBX7!LtV@}9AZGWG6*02S3R4O; zPRCVgCY;F8VBG+Ua!$FQuO+^OKIkfNW|pH?tys=@sPPa{SUnQ_rTZj+RY^#-fYBVg zmQ{m#O1jj##3&}$c%fJ6O+g}7Aw(tI<)tFvFnyZAa6J_H0&HJrh$sUaE6bV2$80 z(itcaEdYn3*QzxZ7W+0TgsVegHgV}yQDsFKi-{w@!k3BDKR$3B-KXs1ACum*MPGq{TjIp-Nz zZj>FUzy`W4yOeR;Iz2XgYSKlknX=2k0f+YpX z#gAkDmUv2iR}YlnYD~)RK+*eFZ@|}J+0~d52Z!CCCJvJK2PP zGESor#26Vv?}noRt^qrQ(x zE%9PfO;iIz8hIDI5ap(&PUWK@olBHGy%orZ-gKB3IKiFR3b>Y9*dpskGPb8F;bcCa zAI3GVf4w3};Vh!WZWOSTaVIX*Q2!bf2% zwpHQDP`{_;Tq5i2A0U-~js^i?NzQW97PV zl!E!GD9AQXJ3hESziPHDMJCgFptpv0hZpey4GP!bVSp+6O z3co329qvc~Vhcg%fdhOixlap)AEmQj=sd3kAgLfeR5LM@*8X($Xkdi^M3|&i;D_t) z2aCsG&;ag9U80A;fgVZ!OosEoOSp9|d0#jri+ud8Mt3=t9zsm^lNB^iFKA0^E-u%~ z54zGU+-~qzPwK9iL=tF!MS9Od<~N4qu7q){%{_i zL>2mg`@%0Ep>Xb`z_&-J?;sWAVR4)de3xeMya>cT{1)!rzxXda$b8LX7)*I`-a`Jg z6D~jMn!HG6`d6I&R8QG!i4Ze_9Jbj|RM2gH z^5E4=bk4;}+6c`>8n$md#M@a|@A>Kk`!@por{WL$s5P8BxxAVKFMta%0O_YJudTvK z5Tj>exzvHfiqWyievS{RMzTdAF0At)x6?c7&g|+SH#_fz@tQ?g%Rul*n`MMcJN%DB zuc!mea7O@~b7qh^X${e_vG5~1DCw7X7dVa_?nx3CAUXG(!X)b@^hgGi`uJtS?m&r} zK~Rz3hCb;_;^DqhMN)2N{QMF{R%wSMtS@G^>-06MP&jrTb`oA(%*x2nvBG@f+m3)YY z&4KUDX*(@8`fUyCeCI}f2uW|Qel(jg*TrbQ%6$rhlm5BaBBkNDX*#OMKIUNQ19Oi;6;(6eI~5PeT?M8x-;3|{Pd{N{_(R-8JSV;?M&c%N1YS<#%qhLI8m!9Qs zj0KyK%E6_cp8G{UX5T8rSimX(bUqJG#X~vZuPDeBkrKzF;-f?vYQ=x4K$sW9ol>XT0@2hqSkO- zf&R0GJs_@Yy!hGop{4)e*)jZ4|C$hL9w&qt^t0hXi7b?1XA>z_ozk5I#CK_U%*mct20PgH)IT(U=Q zJFP!SRTeIHb%m97NG#F;YLHB)Z8-l>>`O-;mnVFKh(`a|uWFym8toB0fAbAuY)wSI z=(u5cqVu)ysBO{cxRtScV}o{dZ{?vmK32;@a!8OlCi3Y8Ct`tIs=L{R$eAeN7YLf` zF2~t{dw_h$7jo6D5vM0Za5Gj&@pN&i_s@T4H5;O&k_yf+ z8peY#pC^7FR)Ay~g$aXKTM>0HC44VgvTr*bf+&~EJ@$=whtxItK8^pa&3Px|Zuw}S z-8UypNEhJEwgaFCuvK=$bgy2o_IjJgX~BUk$VcEHL0b*v3WRs0Sg^i+44~z+Cr)rr zI_UIV3~u1!94vR>2Is%-LHOOHq5Jdav-8jQ!@fLS6#}2YR+vs!k-wsGr9B$rFr$Wu znfhVXVF%$slJ-~By%S@YQhHtu;MDmQubfomA3u~I_yIgFUq4DvvER;Sy3g!VPOC-E zR|-C5(&KSMrIl2yp3;&Qm`Czxr|f~~-oX3XBBXB8Ti*hlL1_q~&x35_#eE1RxH$4% z0sl^ZFHC=a1j;`b+~taL!2>i~U_vMMSV4oJ1A z|9Ck!1Qa7gx+AbWyM&iM-p_stuiT8P1cO(vQEW}#8FM|~643kD{ z{^XrYE?Bu$-lG@k+%6UMfiZ<3LLNypcA#bYT;pYnSKe@{X8bi-BGom>1QO`|0?ZNu z+1J?`Qxsc&jy4+ZI<7j7)=|$9&t)HQg7b&InAx_p{ahz~Alr#vr{9*>1=20lM`O~{ zmZzVHv2JL$7Pj8V9yvV)vabg~Vlf$M(?jZ82i)T1TV_NzqNAaSN~EuZ>CO z(XZ#kv^H=@x!!mV>yKEnhb)mP#h8~s?(bI~kNNWIMlZ{~QedJFa=$A=3c7*37v{!njF+wovj{8usFFzmPoh0#>zv=Zb8oD6lu zFtjJ`C8T0g1qcU!2>iB!y$I6VVC4LG^I!rC4yXuvOU^@v&K)iU-QRiWIv7qVSO5A9 z+I*6i0O~>ang?n}^7kijpYAL3sDP%)>iRZ(g00JP9}(!6ZU`pWcwqu&qP#@&xeAvO zY3Cj0zm^v1sd`Hs2^5sNY-R~31vKHmX~7Sfzc>k@asSN;;XTQQ=;dvaY(i zPE?(AVwIg5x>7K8x*VmF5pdI*D0Ev3FflM?@0-LeJES5jjeF-Iw}0Os->-mokp~uN zTT5Nt)$c=50W%sw0PwzWc{f}JdJ`L1k~Z$fIB%`DLRrw&>smrH=MMR*p!dCCLfXlb zmEI7#3KTpq&cE1Unt`vA1L0tBjXStVz6rMz8GD&_fNONh612C?DU$jI-Ed*_mAJdu zDmQ74tNe;>#QuS*N*hg<8-rN6A?4fnBLUeQuTE%yBspFG96aHTTO3f`eOXU#Pv77bxv|a9_D;SJsrc*{C4$p!K!j9^T2GIRc5NxELKHD zuUL33pN=eTZL2Zuk8(KLjW(-Hu0^@^XD99Ha(VlZc$-z00ih_2zMTa!)zvN*q}l1{ z9IAHL>A3hJO#t0+&L-YTPCLijt3*o4Z`O)F*Tb?= zOq;Q5lCI&S;n+Giy1&t~QSRfol5CPz37&4;_@H8{^eRaq4sVb8m#>w2(&z4~Xwfd0V^i_nQ}t#hl)Brb02zlmA2 zj=WS|DXB0c@m12)nd0Ib;cvqF1~P*oSIqo<*#^bS^BG+lpf~D>>jD`}9u|8Ulx_rk zc(Q%Cfg0I8wBBz=osBexM@-VeI3Ig8rPX*t;!RxYoaX!8<=!WZ< zD`4x=UUmJVH<{ENC04Jx&M9rg_tj=K)(1_f=5)O-=1jHQy1#8pY(J#x%0+BfoR-%* ziA=4*+PbZTy{Bft=1U2R5Z`L10V-}F$evJ$Q`%-RX#zF4u2*@cz;s*ZF%Kak*|LrM z_|^jWtWsw{8=Lv?kUQu^Y16@mq{@-kt{H5iR>e%3VZ)ZsMPU};#3G(pIq*cQ{I+sl zEdwVis(eYqlXJa994e+f5eIL%_Ocl>24ZplZ4=?-$7tFybyK4t#fBHpMc_>%Nk5ZC z@IENr5c$K)cE)~5X{}21EGa4)Z;hIQIY$8?g9%X+x5A7JT zSeq5>TEpC>u!o2*XJ^=(D#N1pw*Bq(;*PGdA13EFRB zyX6eWhsajv&^CRy68H3CKh$;cbmKo-sPy%zj0z$6y)s`T#-RhnSn;G`(=P~xsM%Tb zvBli27-;s)YtZPzli_n&e2trr+e3svP|1X0%lW+{UIIO<1mcoXpl``=%=5E#K<~fA z%@wNqKc)-n)^2{CzJhrZW;HC#sv3r9xACi)kCasxX1^KIZ_Koa{SxmPKiIuV0$Hgu zCBJ=(>3oUq+-(m_=SJhm5aLnKFjW1C0?(5l@tO1+a?^2^&T$R0h*13oZJjev@$E}x zCQtI}r)^!uULfNVX0}6E?cAoM^|@nq={cfP+J(W85nr*K)6uk1M;h+ihSuBamoVPn z%0VjG0@H@SbSzhsr?s;g--SIXEkj-I<-n_6c$ZA|Hm(g*tkRGVwM1n7mfwkzxsqEo3vJPBEkv5%pC7YUvMzJ|#%SQ_+ zc+#FLpY&i(rG7C&tKi(1nsuTy#riuh7>l}rM7@Z`)yL2Y?)&+a*R$#{eJ9tzI8PI3>jQ?~K59Kv zP}&|oMG+>1Dk1YDJ0ym>$Lx*_$&0GlZcBRgNeKJ~V|cU0mNYOxk}AU(l()ATc$qiV zwc@dLLlPk}3w~J@#4F{rdW|CS*J@zK9mX8~5F#+~$g|uHbqNP7%QJ=b(+kXr&BP(a zje4j<*@GZ0c<%O)XQ*Lvur(fL5F{9ivStlyT^em0dvpZq32x$N0_!~(Ih=v%jVj6? z5fTce{t;c~fhbBH4pIp9x$-HOLKGdslt4+a&OjW(0@b0kxWgExXk&g4W`O!!El5JJ z_ahd*I|XEb)dFB>-Jwmiaq1Tc?*D)*LuL%h*;5tmF}x!^=;MetNdd7$KG}uOYle(R z$wR%7pMGec3@8&J_E*NDgue_-0A{`Yxh{=AhLC!^mp%#3|J7M}6`B_t?cIUD<{2H5 zye2MrkvXWRAtQ{3@^2`dNyI}8Y^JAvilK2cH%sa+Z2}xfH&Xmciya6zlAjmpkjkKY zthqWX_@YM)l3kBy0IfmB3>Il}url6b(1Nu<+lNQE>tL({-WWP)h$-Q)%5|>H!wee# z>L^V4F@8eo&H7t{=b4j-ghz|iN6}trurr$bnc0ejrAS+ zm2iSVwoaS+rJnIVc4~M6yomKiJPK%UX|T9(Bs#(SDY6L+Vx${?2hUN3nCF%IGn&m5 znlaRe6&?-G^fE|2@udak?(J|t1tE>VLd5b=fb`I{C}^Y`N`pR}(G*TIOh~9f1ezPt zzIZsL#-Of^zmmxcDN#-Ab=T2<;m+*bm;0*r31R)c95FgCofyUL#7U>ziSMQVn zXo}_TOAaki<{dpek#RH;YLVS6=6HKwQ0)77d_Xks%a2sNG3MPsdTZo$;8QXxIhX&@`(VSrwQtQbNz%lqC7I$0I|4M zVhf@P;uAIHNTR=-xK}&_K4o3tlOQ-~R6bz;tRQ*}5>ztKrYKl64DuHEBjaufJlz-- zA&6pnxOW)jLdQzfPPpQ@q=k1-U_iUUiSF)W3V4I)3u9a+QmWT9P@)gcXvsa$zQ`9B zGu|cNFZWOF!?oaMkdwr5kd%BdL&iIJbu5s2k5_s-SKJMXGU^ZqKc79Wk7lk%B*PIr6`nmExx)h&`fCn4e z4^{|!$FB$8N9Oms=RgSoFE-ZlL*Dw@C!TGTaqZOAU;IiAoSlnwrk{yl@j}zmI^7St zjrE3PXW4!_G2;DE5?J}8>=a#eIHU?tS^@=!xQ*EfmJ@TiFRgZvmg9fTP_5zv88zxZ zg#=^7B?}@C9wZ8f`dr1KO+t%LkZCe6<&o!ijEP_B#z3NXK0#yK;`#@WTJ^q@ z9jUKZ2uZ4@s?x~HC(@qS0w0~6xcF)wz#AzhSt3&#d~J>Iv*bb>OMoTPN>^q4dBLe( zY1gwaEZe?D+f`6&+-3A}f#kKE!1o$u4lT37u@IUXZccwYT~H5$DTB`PS2rC>Ij__l z*VE6?#0Z#tkHDxqr z55(n@XfEKooM;-C*ROPC*NU#)Ome850L;TRh9kVkieDJl%*2zlm@M=3((g6o7(tWC zb+c^}z^{%^FFF_`)PY(Y1=Z>EYKzK9LpzcmI%hF(H8evhw0uQl&k7oUeu}h!oDqxe zGq;&5Iuw#ev^!|@KcoO4Z=bf*noVWoQ2gxfevB`?jWG1rn#zUu1TUh+-{NEH@q*(l zM5S@X;1Xsg<-l?XBcaq-FlrWty52j_F~4Pa5?TBsq$nj!L}igTzxIX2tu|FpkPu`0 z^k1N}mc?84nB}rXCmq4D*?#5GRM8co=1mYYsgxfxOx59Hv_=7PJ7)_D3lm-P>AqzP z*0|xdY;EX7%;HV)hP7CWF!J+uB#inM&x!g$GVgTgjib~Ty}#KtOlo+I)h*xI9qOFZ z-i@m?I6#PvlvUWQUg&IJF;*Yl5+Od4S16}q9!srf?L24;3?w-$$@e`PL1_>GlL8-%PhQjpEv99NaDy#%X z{sla%dJf#?%SPb=@D2ISdi&B5S=U1;%EyCSL#MM9B+&FOR75ZidJi zn{jD3YzWBbwbT{iJ_mHQ>_zEZmBHxSZhpmwcVSRJxp7H=!uJF#oNkqN_KmKiqmqHw zTt@$3!4R{{j}1;LzE|EzC^zksU(IpHH=oO|&Ad;MdLN$cNM74wzY@(d#fvW1B0xK) zXQA?Z(%PMxxOiy2+(uG%ff&wKbY?gGNnQw_M?_e7_cGae?+*D*VGJ|&ti1(oP zNjp~vm}IRXN1=V9vACHO&AaWbGVYHcD#gsmuR0_o91n#ORdb(J<^kf*Z1?lzY-s4<$PQC#?>e@d)nQFDz;0bNiF)+<5DI$n>CgfiWxoj;h8mqMC=3%FzIkUc ziVD7Dg5*_f=m)^MBSpK;>RXyR)YPOaa}8*Tv!_7>1+H;d? z$@d3~$b)Ejv`!x4jhC|f=qU)z0PNzKg(BDNvJl1aco2LC1@=M_?mG9v!A-J{;lENu zH9*i!qR!HyM3ctQ8Asv7IreJcGC5;LmN{u!+`g~{KDq`xl82-|_LQOdL$26o_f}0` z3p4c>J1El*DQ3q)1E4T>u~|p017H#%cu*y~2Jj_gI4Ogp^UwjvGp#TUK)_#=6SFbW zQV2oVcskz`@{e8n+C2kw33?Yva+1+fw@@rh0wN361Q3ZUf_K>eh^$lR zIBy~f6jV6*{}5SmDCGa6cV__L|EXgBJr~A`?wbk(!$^PT(snJ2S%GHb7uN!Ctw zlJ%~)H4J_h_F7u(MM}4&%IZv)5<5ZvtC_b;^b&El@gKL*&(`<8>g)C^<6HF?JLa$~ zm0!=T{B-F5wf|MTG1mPKoG|#q+1|D;u#%*;YGI&CZ`B^jX2kYRZs=n`WuuS&LcQ^k z4TPt@WYqT3L$>~csw*%VmQSDnZ3B4GHWP5+osL1fa2;I z<1*JVl(q3~0?`#ai@dn15I$qt(@SPA4mur(=S(>vPE1t#h)gxa;l)_ppYd+5TZxNq znwa9%#9$2M`4kM0_j90YwFo*MOvs^jgVhWCZXV9>P5@DnP7`9)*rls!Zw0&jS0sgP z2i|1t)95VgO3f;~+sn{% z8dHzP*4I8k1)9Z-QFd5p2qF*%9f@7$P5t_uxn9oUIyPLhdsMSyn`KgFeHs}M;UG10 zExqFTFF>`Gb8m50BElKuqLak$hucl_DSlIr9f9WNWrdhId#8F~KOFaMZ3aGlNp-Dg7`zq_A9x)%z_!8>C*coGaF=UMnhQL zZh#_7U&Klwh~K8Ea+hl*hA@Fod@|cW)ne^9?8xHBpi0+MbaB_a^)1hCxQ{q_s9kUo zIQ;#CBS+o)uo?UvIQF0qZ6fFJ2=M4^pYRqwId}x9l48ivsq_dvdC?uCezX+KA%U3Y zh+mDF1&8!gLgO9udFw*Pti@63 z9c!RM;bhj|LZ?b*I2{MyFo%se-|~+4#5rJ@^1G;C0fjkOH*};W#oYK0lE?L*)qTxu zWT&JtXgm0`F=($o;{aXqPcPr2ifZk1-MGE_WH?_o-jKK~(bz>KHTc*$GZ#BcPh(xTJ(IMErPTB5UDSe|nTBx*Z z2ggk8(KTQ9kwo3rD1XOb^7VP8_Ac5eJ8!CyS&5!JfzJ*upe6dD7x%>d>;Zh)JdW1<#+w(XrgX zxTnUJotcnnJ_u2to2Ww1XoeAP!NE5XT(EagkD=8%8kESPAir@4FM-y32_?B( zlB#Gps*#Y^soGQ{@|s=j@(sVd6_zW}YBkO|DNgX{PEswDne6Vh3vL?teFfYzxFl5Y zpg%fyYdcf`*;*d?JloMyqF}6RS{iO}dHtaqcDCKRq@J~R_2DhOaUSmLHRy@A)QfP{ zcO^S?#zFNA#ZD$z#yY?4SMDbIJz#$jk9l^yijj6HiT|#t)o>#SzlPFT+iK%bJra3S z-s6k|Np8$j*^brC-(Oml-2-<0yM?1V7xkgbHdDG?BV-S04yBFZPvTD85tF{klUN!1 z8>9s$-SxjAyQrsHr(=>0k@#?hwLMR8^jU=)yKqm{t#EL7mTjdw;Pw|USod)juk2CO zm2U`3#%hjgB6|Y~#1cD$f@+tEbM;2JLfd1_Sc~gohxkW&=@Tn5<^W=@d|y1CNojS0 zGNmuM{k+Pna4*Ig8JP4PhsZ_VYt=1tFW+xPkgw?=7>^W#B-1K^odZtz2pemv4qH%; zXP{CI>Qyd&m~w7)dun~`IXx@hA`a`WQnTh6O0w%Rg8kHXHH_qfqiszs84mg);i~$J z&Mtnk!&b6_y^#SUSHQn4S@RRq-Eo(d?P2v=5qw=HJ;K^PAEWcG&;iK!MNuw$OTtch z8~S%NOw{6+_ocI4b|57*2gja*sXTOICqj%o2z*D{`Ey7?*{081LE!=%jTp)l(U=e+ ze*EBuj9c^9^Kh|6N~J`~O8A1xrB-%yABo4ubyL4(TOZ(Xf&iFC{0vu@{HAVkccS|x z^us1d5oIka2_;p!i#27z=@;8<`LzG*5Cexyw-= zQ`?PLzPB@HqA!Z``x&hhlfUK*MorjM|0vko!*+3dVQ++{D(B=VVJ=&T1?wSWP$MXiyfwL& zgHOXz)w`uaCR6@>aH%*9uu**rGs!XyMzAc z%iV3~KID^4>P)w0>gVXoeoh&zm{kU`{Sq0ck|aKW9ti(DV$OLpdKHH@;RF_Tq|)q# zIV{`vwDM1Q8AUdqjXH5ggtN;j!QlI~no+6jegBmM$NXwd%Im@wPf^q0HRe%4^2ax; zXd0GdC<<6Xldl#8t(-~O7oh`f zn!7$J0$1&JA{#cm{==0PSpZ_+(v;LAc1$+{Ftm}xhh^$q-8k2qm|U*S+O6yz3*6=L zMmB>U(khR_eQTR>U_%Cb8sM#_n+7{71X@IL^C$Or(L+5CNtyg+f6}04PIS}B&vx1q zk(Z<U~UZ?GrTJ3MJ8_ISnsD$jEZw4hExsofhk_dH94~02iA^ z5SA*E(jAr^O>y2Ro)K~`i*63x;-|%*)lw(ye4c>go! zyyy(6dL-s#hXZq(xI?;^m@chM-T0N#Thg(Ih4=ab8qtB1Jr|t$*0+c=dpTQPzi#@N zS(iKcrm{+MU#wE3SwF_&|5{@gTXZsqU11aqzS`v%JL$I-4737nfImZAJIZn4lII~V z4Ch~{++3#(p4TQUYRIjy*5S}P`xEIBXG{qIi!0dK+T6(=HY+^VRQluHy>{YqMTa z%ub5+O!RQfmt(H=^Zshwq+VW8v*}+O#DIrh2rVp@z3MX%rcxDD9+)AGZ5Y5g!gOeq zi`@^_&m1qE2s?&3vRTl`O(@M@akeXLbWeNz3m~v*w4{4*%$FKPIN zvSNy*7l`I3i)Ot?m4b;c4a(Q)%BmL4u#Ihv*{)YGKpJPnw_W&R4>66dce%$pdr6V% zkpujwTbb#Uvm0Q%ckmsNg=G>>S*`U}l06q+_ReiNkjj;NdtHt$EqDozloEoWAT761 zpwXaUBkIzR`y|y~SVyFYsc>jnw!BrkC1+bWIL*8NSj?DFwO zp@~D2gkh4VY|uYxUtuzADT`4pFvViyLKOHCyz85_#B;$by}hWRyxJ`n<)lyuz>Txm zv=pomRwCe15h}%pPU9APk`$&j5kM}?+pw0)jfwTI4i)+=9LVi3^^Izat=J5L1u(`q zzqyR&mxJyR@z*U!P~!=+%RP60FvasK2$nj6Hhb_c1dDMbN zBtuP0U_>zK%iAq3dyekNhIj2TMVCOT@XEHsoFpy?V*K)W4*yETyEX1)aqueUwg zQoEs`GEvS;?f$flPE32UFDgY6HA!lD{WWaSS?tMZ%vhhg?y!253%?H_FFQ*UQ#&$Y zANJ8_mNbN*hHW4D=5S-jxV!hkjqA>Jh09UT5lPrDetyXXj`yV`wn1@sFYWZUFX199S#n`-#7V4m+c+hDHfuB0oqNvVF$)d5@wn|bFSWju%u;_E)Z z93N-3gVnfKBb~%By<`5o-go=}$pSquV*M$HjFQ{BaXTH}r2XntJhKDf3SFvB7v(&y zOnEp>=Qa5lS4}Zp8Io0zjWwUe+zjzR-fDz@#Q{^HysF26@7VDRKc-_b0~~b8GAb3% zu1IQu`qavp+WwE-1D;G$AwHig;V=f!nsZW0Rv+&9R^-FW1cJ7)}?%!;MWlCf-<+VR~}xLKjN0K9W9a) z8Z6N9><1}?#b>b4vRN79H`P9@sTqX|d(>)JQ_N92S~NB)AP1BsN<^xaqm@!C;>=kF z_IZ5k(J|84Ee(d*<7=#*xnNu#J29VTid{b$v>0nVDqW6_W|oryFn`b|DWzuDV7e8) zee=cG_EN^Ys10?^S{p|puW}SVQ~By8j~_>qj|x|=3RyLAUjC~h9Pt0X5NJ`JsQyuoudvo! zb0<`;J^!N{du~d{0l4CEC5bAC&mFTEr^*L=^tq4}%U`B&0n4whW;vGn<8yxv*ti4= zp8dAbFXHU7{h?Zvo5aV#)Mlu-!jK8?%Mn}L^ESVSh|OkxxY*A6l5zeNAH_aN`;}0d zyQ(CZ`z=EBJME6IlfgfY>7}VRHMiV{;jPaj-bS^sj=O&R)dIY04M;vXd+qV2rZ?3g zIYo2ICRM}wqR~FJ2+EKqE@0Wa8^R6szArSYZC^Og5powuN-@e*+iHGa=(b@KLmOWj zouqaaZK3u2pkjDAI^O!~0R1boo*yk8M4&7OUw#x%zoot~!}RmlFWR;Yq&7NMJxH^a zlVzo`4i;3TdW8U*usWTEV}sV+fR;VcNG%?OBY*#0S=8!d<~_qh2v`LA1K{p zcp6kaVrvB(PFe-+Otb2Ip94OdR+oyw6Z2)Uwp2(-8D-+>4<^Tx(c4u=Zg0N|JbJC9 z$NXq`nHgiw66a|$1QFTS^ex+?6fMf>;$Jq^2oC$C@oo&jInz+2&CQu|bfPd+MNaf}_TlPl0slY~aei`DroNy9GR&FA!s9K55QZ9@~D zsXM7LsPkp@Byo6`l9Q#aSLDuf;%dgTTyAz}-hYrColy)~!#Xd!u-t*k$p1mOm&SuV zQ9R+Z=1>lJP2A6$F}YFYkP|#)(J_`?&B(e(&zWZ)Ohjx^*Y!I{WwK;-bpHDJ^UN!y z+{YvGaO~$5=9LTeROH82Vm`5^q+nA&HH@MVDt~8NWctuk)1Hn=d#@`SioxFKfmBIz zB?FgwNn`CM3xS{g4e|%R)5n1cy~ZEvN=Ij-eu~5buHhQa*B%KtiOG1~pOMXm5;wiA zXIlqQKJe@}J02A&@Dba5DtNOw=s=rEH7b<*UQ7>+ zW(IL$vKxCdT>$CD&Si$S_IGyV4}J{F~7 z{>GKW^S+tn-MkxNM*2|GD%nGYw-%5_!x~lk$yD=C{*l> zYRm~a_iQ;v;^dxIA2Nm#{BHWTp%z^6%MValx1#jc#`o=vVQOAA-Ztigl%m9G1KqZU z)LMcQK1X+d@+Ua_4ER2N6<1DRRl0RJts;C@Mt#X^;P)yZAVMkGBiOq|lTRQ`msj9w zRGUGOH(uRrjJaU>6Md%A9?4-i4F4M#it8nsnICO$TSFDDQB$jA#zx6d>b3pc>WmXF z&C3W79&$gS3(s%ab=~C3-29%KU|v}hk_6h_TL+fn4uGYsA}MSu=r6x;yG*a?#{A*= zMz+V96Z^(@Hd9d@KUD+E;f}w$pRbm)G3T2k<88fx#6P2r=T)@HFR_n&ZHzA_iTI8T zLadtjPikWeWUXI^e9jpIaf|%?lESvy{Kg>pNRRXWq#}tYXr;M(DaUhUG9DB#tpzMc zC;pHxYntEb60AkfUizR?uq8#|<^N-p+R%7VIeFOro&7_m-gRNcn#Yyld}^D2rv1D} z;G_gYu&(4nMT}KH845G6ErYdH-LEJw!G-jBxxL>$@4CYy*LT{*Flz%ScBy3AmzHW9 z4_HW;_q58Y9EV%^56?5o+GX(S3UAkYlN|W6g^SxzK^NtiPewx zpe)5j7kf&Qz87^=-sB_UHJjxLF{U(2uXu<%qs=Uq_@LX$kKRAba8i;JYg_?O*wI`L zhT1K13tp3q!Q`Dws7s6RebINQBl+0g;hEo!_l78TU~|w@beJXjTef*D?^j^9_W-)DR_^VBWS*{h1Lz9 ze6)NFqZVJJ1q($&8}PH>gkqtd1f3*3D!EhAH`=J#3M2}^CQ0I>cu#Ee&1Cl^!df8| z#A06r^2!0)~HY~DuX^1i>SEcr0SV*%Ry*+g3Gy2>TUaXf015)kr{vE zZtYs%4J~_a1B=Hzn9TkOo@|n_y$0__$aXaOHhQ2n4DjNU2*&wc0j)nG#cqh;HT1R% z@*A%6WQ*cYGH6P}so1vPmr!t5C$&&X1P1HR%_tx!04~U=zrC6ei{!?vXV)<>gyaxT z=#X!NjLzR0*&G$hhSWKg-ML7ij@xWc37tfQ7QZlkc*O*&W4jqLWFqjrse?l}7nS|q z^c}p;FVczCq9d=OUb*B#WF;DRFNXh{qgz#}nKb)MTw>-Rl-7}+(rJ8mZ~OM8zczLt zPcqOMy2p9i^2ynUJo|;0o_KI3^g>7k&YKRZiu*{pDedEr;4BPH4`UnMZi1As5hn(@h8_7t}o#$zmVfv+J_!nt6e$k;$Pu?@=T-&nUbX zxkC*$lx$|r&PMUahbGr1wMben3ro$r!T_65=eHFblP}O2?_0ts#$>^806Y)UQ(uo= zFdH#-)x#x9r}DwmC{>SPVDV>w(ER8WdwyubOuX?ESQp3c9gC86MR^RkA`>^bj@au= zz2zONZQmXREfTi`dc6qZO>?eUBn8*(*6K!R*Id{g9@KopqYDOHMdhy#V}eWy3pf%p zX=kAj5eyed7~5`-cPV#yMZ}zF&Nds+o>0{eJ*Ry*#u@g*tGULhe7ym)fxm3qOUqV5 zK|d4IyQ8(He6tJt$Wo2&Ulk(??}!j+y!oVX1BNw@m79BU&%a5X9NBAmvgvC1g^2Z> z0b3@E2$>YdxExxHk-W=`xwbD+2877Mnus+|_di7pHIUy-Y?|lk3>8bC5m7t2ofcCv zDiEpCt}~p86Feagd42|PhE*ekOfRFSWpIX{UYXw|!me*^6%M(%xDLHdy?)i+q>27u zjU70*B^abpSleBxcJmwn=7*$%e`eG50{WDIGD7|;F?3QqPDqofZ54<57Ql@=N4fQn zP*#({8n#a0Fu@(x!95f1A&|g!)rx;U`O$&>8E$a3q6^R7;(q#k8%VSB*dKPnvh$(K z3pRQAMtuWqCm2OE7HT{s>3Ed)yZ=WvVkg#3IabJ-HO_5nQ4$M5y$G zl0Ug~aZDB5nv{*w<_YC}5-^wjc70N2NUJLy<>?A(iCaYH24MXCk`(z~S2PgGn{%!q zEPMaMZ-P4%?~Wce+WYsS1%`9q zOQ6rW*Y>XAz<^xb6Y1uv^!;F0^3=WT$l9Y5_$eM5O#KKC#orju2ZFL+Z%u+s6tMgg z#!Mu=b+FiE=PrdeGV@>ibiM$HGUad|@50s_oiVgr@^{LPjKBbpAhgbg_sA^$SUuhK z;p45@#%S=~rzBpe5ZFZQ=F11qpC1pxWeK^0M+1U3ghj%Rw}8Er0}iNFHf zx9L4kQP3v*j&M(4i?D^Mf}G|&nB&Z!;MIK|NA3Lvq0e4160pnD`_RypKKAG+LaKG5 zy-{JY(;N`hvtD-58RIOCoQ?J-MVhy#AAh#?7TQfGN1gM^;_DUN0^nfewl zPEqkj;o>{`8=zdQbs)shA@VDXWu)8{1#_$q>87ikgjFXHYOCH;cbD$XuqwZp0=F*5 zqjkd_wyph?9@I84!tmh;rPHFk{oOV5J={kTRh+fOY$w=`R21Q!z3V-LU?-}&SNA$$ zf_LYgFzX71H+)g+Fzr#tg9F1NBAS~YVjy?P*lX6CEBk@=8UEN?D4qTpLY??vC#@Y5 zxtk0aAp{DbHS>q4$|wFiyhm}3b(nkDja5yI7=~=}t|rh)?sk`VYPMptS)_u!2{s7^q zt+up=^x_M`j_n(FcDQxqBumEmVV-K%jESR}GvJDRo3+4^bKtwO2p^R{0c^-Ym7h&E zHvWwta%bY`lQb{l&AprrAh#(>EyNaE)kl0plYIueuy?A}bsewjV-*O$$$WN0zHzsI z@hfpxhxy*H2Xf}ajm~>LbDCH!N@2~r1OL$gr8C%BT7&{UJ1}S~WxNX1nXMoq-N}si z2rv~safdDF-ykONBJ=KU=aC*F-zW5lR$M$)=;+QgiPw@s{36PD;{@}^^iN`R*B$*Q zdKDZ;%C|oV53K&0{KeLsowIL%-;wTM(dK)D5ll{Z0rJCbsuyERKz?gN{gdNL<27wb zvh+{9(giaKzeV9@P)<~cYfiu~5z?pOAHXhkQ__@nmf|Hlna8NI7JE;AJ?Wd4;Ei*7 zA+bT-kO|sj$nXh{f9eByBG(k;&8?f>@t55msc@D0*Ipg2#SKK+`kpYJ1K?lOo*ZrP zO*#fatYE9oLDoB^SlCJ~de*nYeDwsXGgI+= zng$eWV) z_O0}Zxra~ufYKQP?JzVB@6IaFl)QN~oY54`*Fiu50>C+AJk&MTqLh;H*AY9c)K3|s zqV(29k~wl~g%JTNwAng+9n?U46_YRq&eh&&OIK%(uJyKFmns~P3k8d84Ex-Rc1@~n zUFrG5LD>!}l6n@fAE$aIVJ90;_nre=s@~pc_w@WcqANGJKXsHRKEIF>p@=Ry z#Xe$;`(E-H5lPf;Gx^{`?xw;2(NFFX4JwH8M83&(=7S7nb}vt*MWPUqrRsvJPIRdy-D3LDUN#E!|XhYaWB{k z(YQ%2>pRN%>=dyhh;myRBNz;*c4)A0k?W$>j)Dn1ofSTZO1r*_BVjp1E8CEA^=^3W z3BQR#Q1v1ZVtlGK%KST}=P87Jt??IPAnMIY9&{4-47F#W1yDNts(kWvpQm`oDA0O? z`Pm&KpwNWeC%xx{_8x=aX<7QDBd+A3c0sfp8XNJ9kNb=$eTEFaPn8p?_&m9^h}y7L zxPo7EDjVoyB!WNOdAI9v`#~dcY6`h(6U1>4a?uG5ASpH>~F=0oo`LNO&Xpk&$N&=HA+upa^<`8CZ*n z*hv0J;2Tp4UB_OtH7dMMWX-hQG35UMUBQ3CGV+)m%5B!zY_qlWNnVIR?9%_j*NoNQ zrF`=}nk!I|J`z~A_})uy{lQsLUay(QALC;ss_$4=gY~B=u)SUEZRcD2_~!92o?1~; zn_$+OW-?vfk*{HnloyN@S$z)4zmF*E)2Bp-qmcPqg&15qsq2%4Bj)H(6~DK|k4@Tw zmjGo=boK1e5q=KI!sr0u$0x);?|ZCXY&aJYfk2JYe}y!0g82V{atX{l+?}jl%`DlR zU7g<9*ju@>I{U~-aQ}(df96;r0Q$AM9aq?2e%)gZ%fRm9Y%TaoI21&lw*3PBD;_v! zNa>4K9IX6We89sieNV%M(H|aNDm8MaasFEt1^8>6h~HY{Pnc*zYl~9Y-}RB}cKbVf zV;&E0Z;n#=f&;}4Y0`|WnsN1=!2YM-iP2^8plkE`WUFDzG224A4B7mX2LNv}hv3cq z#xE5cXa04yF#$YmNJ6zmMkw+IU$)OwTgau06y7fg&Mr~O!gNN!4VMVh+2<79niT%n zbxT^Ac#c3woJsC5dF#UkrUu1}j!9}w89p}@`Vy%nl-!|xO3jJSCrSwM_t#36VW_gh zy$2wKHtyoOgY+q_1^kT9n1D&Lz0{Vq-G+$FJQ>iZ&zv8PqVWC_Ze(eZx3Z0byif|7 zQ#aAsCFnsu5fjt?(zp`xs=!H|U}6zww2WkdoY-O%*yD~z4NaFCDc0PXjDYySO$W`6 z4OxF?Q!|$qi7uj7R68ekC#_4M^=TEZ<_fXRb_zRl_DT?g%}{pY05YSpy(aW@st+*V zIEewA!*X`!8@6j?^FuO%mzHmeZLyj=Mi`dW>vhasnBk%csISMaJ}Fb z-gIn)07tAGgU&;4ruJ*suH6Vsuo+-aDbqa|G`&-YdL1*C`F?jq7* zyJ=o+OI<>w0;}?=&Y|9EK;f<^#V`x#7+Z8VU?fX8y)AkpeiU5rIczFXy^VtP(}2F| z(9YJ+o0)!flH{jAhuLE3Hm)QmU$Rd2iIn>^rP7pIqRK~#O(<<(|4|T3k^b^bVUWNd&!qbQc_yo+mAQwt$zKdMJpbdPgg~OUyxaEy zJo1Sigm^2vd^$ZvJ6b)-$c$Wk#R*#d_vy9!_);H;lLZQ^)d-a^f(hYYh=tA?zAd$P zo1nTyC>^2G;@?fMbDik7UiXJ~Yh88U{pL=hVkh+#j{`sFJ^-AKs!rRe1+`z{9#>SB zDzY4%DjarPl{7S-K|LxOE`X6xt12}MHG+0zPg1s!;@&o?nP!^iDw}8Y&}vK37T%I; zUUIBq#oc%32lN){utSn}t49czStp(>>31G1+P14x9AUcx$XUHjb{}=jtvPiig=E)v z&QDcW*O73m#uB1eGq$uAah2N$mHyv+$i#f)M7p{vI4{uR~ z8>HUm`1E8Gr|n;RLtgVg=zjlp#GVMQB56lsh^S?fbV1FMd~$ENJekwjq|zjsKuCq&xgi~HhT0dZ*x#Y4xrhtB0c)C0BrfIA-27uajYCZHf0}P0Ol7#NI7$Qi(w@zSL##UYh-i_&lkN z1g>+tq+i2h8r~2zZ)8m|55R3iBf*OVq1Tvv0uAlSW>Q9r8yQ4tYwYXWhvmDZkPU6P z3T`-ihOH92TmcqjM<5C-FINjE{?wXe*+zNT%}E>|%XGos&<2mOh+i#Aeo2|+g|VB) zDZ}d@uZ6zE5XZ>I9BdN$G4q0Am06XJaTa1*o5$r%TXR>b_|y+x70DaFWS1j*BV=v4 zvV$I%nm^;wFF1kkHNCp%Ccj*1M^lhQK^==$~k*)6I2`Y1DH1oqKG24 z;}UWa0|tQzUouRypVVLZ-LVBFJLTO23SL$9XgcE3moSqNWw@^lL?H3=xl9)*r(YYr zmMRL~ymAJEKULe8L*XglOT4X@s%jHu>q0C01wCr1X~n}~PQtI`Hn1gp;m~B+>>xNz zBzqRWE4RbNGw|P()Ly;}WG3)&MK>HgrdfNBnw$r8)77(vIi|$b)#FTw*Fa!CHNlMt z58}TO$@4|(t6_=cgjs#*V0P&|Gh}LIxaZbaPdiz+Wn`*Iqk-qq%Cj@wJm@xFA5zP@ zQbgC=CvBF}j2 zUEUpFm}G(?%zxgzl;NJdgVCREajD*uIM5q@42>yua@jgWH*fE|+GEf?Wv#pOA(#L1 z@@gnrWWI|(m@7P-VJWvNMl~sPyn_LAfRB3fyi@~lD@rqqC9Y=r- zxx4$2Fb(2Gq#?g#UXy0V3kgSCn}=0-dtAu?=$<;$AB=je^A!a^IC1GhIV3oOKc|KQ z+>ge8Qug`@RI2qGSndrPE+Ef&d|UobZ92mI>y&WjE9>fM!;4#mF{6d{Eh91#xySU^ zTZ;W{1MjZcrN30xNm?YRwbvi^T6!gk@?*gVhGvp(vlBqaNu_B|+s7t;J7`fd;@NFWK zY{2*ha|OSvwz}G+NiG)PF1I6tfA1l?SJN%8Ie{>@f&3PJx!A=42@~J;4mf->vU1p_ zAQ!kllW6a}X#Hd>lu9@k@97@*n23T9@y^j%zTANx@!Xx>d7RL06dw?2#!uDN0}7Ly z3(uKiJ0h;7_4D1lP4be}FMaOyb387ry)Fq3@O%@E^B1PU$@^%&kdBP-Ytb+-9W=$C zqKa^rcBvxm2 zcCWKjNBG*96UFrc^vO%43E8X@)zdTsb+xUT;GRRCbVjlVT#3${3Jpu8ToKMrWG?{1 zrKjT8H*GW{Gc!cHTkgg!uXnHM=ax!?1(0v6h%7c)(0;DN{23v};pPZ)Ka;$fV*W)l z_XfoL=Q{kHwgn0OAA>`Ne}OrX`7G?s+}tQl-db5WxmwD2I9j;dI5`TSP*Pf$IlFn- zTTu#7O8%>c@<)HFIh8r{Gou{yA7Z*^4!QrRC;VUP|MwOC--aMT9L!3JAPaXVSBv+o zwl?mpCeF{CfA&@=<{mcomL@ii?)Elr?kFyBuZ&(ETT2tZhnF$ZE;G*I2SLo5s{2PZ#22Rj!h2QLQ)H|Lu-Je=%Y?A-j^ zyl;4TczAg^xH#F_dH&+{^Vv5phg3TeLeQOwAzMzk5HT2(xG4mPqB9d*{4-vfB_`?)PNcMMPAP`9AACiT{|639XB1(V)QF3L1Tj2h){*N2t zK`Nh@#d!X;EX+XxA$DVe1M&X59SUU34HM$$hDr7}4jBXz{)ZJ=;r}ho1X*+^hKrW} zvyO%3U;733`H=nVnEed`0fBh_p+!>hPYd2>`+J*@GmV`|3?c% z)D;i%$peY(KmGRE=9_b3v? From 610aa61c2e14ec029fbefd9cd7e95b276c657783 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Wed, 30 Oct 2024 18:21:26 +0100 Subject: [PATCH 13/15] Better device check --- src/KOKKOS/pair_metatensor_kokkos.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index c51d90de88e..3fd275f9a5f 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -340,14 +340,16 @@ void PairMetatensorKokkos::settings(int argc, char ** argv) { mts_data->model->to(mts_data->device); // Handle potential mismatch between Kokkos and model devices - if (std::is_same::value) { + if (std::is_same_v) { if (!mts_data->device.is_cuda()) { throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU"); } - } else { + } else if (std::is_same_v) { if (!mts_data->device.is_cpu()) { throw std::runtime_error("Kokkos is running on CPU, but the model is not on CPU"); } + } else { + throw std::runtime_error("This kokkos device is not supported by the metatensor kokkos pair style. Only CUDA and CPU are supported."); } auto message = "Running simulation on " + mts_data->device.str() + " device with " + mts_data->capabilities->dtype() + " data"; From 82aec56073ef624c6f79ceee30018267d2505376 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Wed, 30 Oct 2024 18:34:38 +0100 Subject: [PATCH 14/15] Remove readme, add basic installation line to docs, make example consistent with non-kokkos example --- doc/src/Build_extras.rst | 11 +++++++++++ examples/PACKAGES/metatensor/in.kokkos.metatensor | 10 +++++----- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index 5b39dde101d..f3f1a1937f6 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -1049,6 +1049,17 @@ https://pytorch.org/get-started/locally/. make yes-metatensor make + .. tab:: Metatensor and Kokkos + + The metatensor-kokkos interface should be compiled as + + .. code-block:: bash + + cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/ + + where ``/.../libtorch/`` is the path to a libtorch C++11 ABI distribution (which can be downloaded from https://pytorch.org/get-started/locally/). + The OpenMP version (as opposed to the CUDA version) can be enabled with -DKokkos_ENABLE_OPENMP=ON instead of -DKokkos_ENABLE_CUDA=ON + ---------- .. _opt: diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor index 39a1cf644b0..1a4b9b59494 100644 --- a/examples/PACKAGES/metatensor/in.kokkos.metatensor +++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor @@ -3,7 +3,7 @@ boundary p p p atom_style atomic/kk lattice fcc 3.6 -region box block 0 8 0 8 0 8 +region box block 0 2 0 2 0 2 create_box 1 box create_atoms 1 box @@ -18,11 +18,11 @@ pair_style metatensor/kk nickel-lj.pt device cuda check_consistency off pair_coeff * * 28 timestep 0.001 -fix 1 all nve +fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0 -thermo 100 -thermo_style custom step temp pe etotal press vol cpu +thermo 10 +thermo_style custom step temp pe etotal press vol # dump 1 all atom 10 dump.metatensor -run 1000 +run 100 From 033082e7941b83c9ba148f5aecdf21d19510d756 Mon Sep 17 00:00:00 2001 From: frostedoyster Date: Wed, 30 Oct 2024 18:53:14 +0100 Subject: [PATCH 15/15] Check equality of int types --- src/KOKKOS/metatensor_system_kokkos.cpp | 1 - src/KOKKOS/pair_metatensor_kokkos.cpp | 5 ++++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp index 8eccab9c71c..5b5daac72dc 100644 --- a/src/KOKKOS/metatensor_system_kokkos.cpp +++ b/src/KOKKOS/metatensor_system_kokkos.cpp @@ -28,7 +28,6 @@ #include "atom_kokkos.h" #include -#include #ifndef KOKKOS_ENABLE_CUDA // fake Kokkos::Cuda for non-CUDA builds diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp index 3fd275f9a5f..611d0529396 100644 --- a/src/KOKKOS/pair_metatensor_kokkos.cpp +++ b/src/KOKKOS/pair_metatensor_kokkos.cpp @@ -47,7 +47,10 @@ #include "metatensor_system_kokkos.h" -#include +// LAMMPS uses `LAMMPS_NS::tagint` and `int` for tags and neighbor lists, respectively. +// For the moment, we require both to be int32_t for this interface +static_assert(std::is_same_v, "Error: LAMMPS_NS::tagint must be int32_t to compile metatensor/kk"); +static_assert(std::is_same_v, "Error: int must be int32_t to compile metatensor/kk"); #ifndef KOKKOS_ENABLE_CUDA namespace Kokkos {