From aa30355ece9ccd066495300c47be38f692122703 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Mon, 15 Jul 2024 15:31:07 +0200
Subject: [PATCH 01/15] Experimental kokkos interface

---
 cmake/Modules/Packages/KOKKOS.cmake           |   5 +
 cmake/Modules/Packages/ML-METATENSOR.cmake    |  16 +-
 .../PACKAGES/metatensor/in.kokkos.metatensor  |  28 +
 src/KOKKOS/metatensor_system_kokkos.cpp       | 390 +++++++++++
 src/KOKKOS/metatensor_system_kokkos.h         | 141 ++++
 src/KOKKOS/pair_metatensor_kokkos.cpp         | 637 ++++++++++++++++++
 src/KOKKOS/pair_metatensor_kokkos.h           |  59 ++
 src/ML-METATENSOR/pair_metatensor.cpp         |   2 +-
 8 files changed, 1269 insertions(+), 9 deletions(-)
 create mode 100644 examples/PACKAGES/metatensor/in.kokkos.metatensor
 create mode 100644 src/KOKKOS/metatensor_system_kokkos.cpp
 create mode 100644 src/KOKKOS/metatensor_system_kokkos.h
 create mode 100644 src/KOKKOS/pair_metatensor_kokkos.cpp
 create mode 100644 src/KOKKOS/pair_metatensor_kokkos.h

diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake
index 3776d18a3e1..d462ce26385 100644
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@@ -182,6 +182,11 @@ if(PKG_ML-IAP)
   endif()
 endif()
 
+if(PKG_ML-METATENSOR)
+  list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/metatensor_system_kokkos.cpp)
+
+endif()
+
 if(PKG_PHONON)
   list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/dynamical_matrix_kokkos.cpp)
   list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/third_order_kokkos.cpp)
diff --git a/cmake/Modules/Packages/ML-METATENSOR.cmake b/cmake/Modules/Packages/ML-METATENSOR.cmake
index ca021cc7051..3aa7057b5dd 100644
--- a/cmake/Modules/Packages/ML-METATENSOR.cmake
+++ b/cmake/Modules/Packages/ML-METATENSOR.cmake
@@ -4,14 +4,14 @@ if(CMAKE_CXX_STANDARD LESS 17)
 be set to at least C++17")
 endif()
 
-if (BUILD_OMP AND APPLE)
-    message(FATAL_ERROR
-        "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, "
-        "since this results in two different versions of libiomp5.dylib (one "
-        "from the system and one from Torch) being linked to the final "
-        "executable, which then segfaults"
-    )
-endif()
+# if (BUILD_OMP AND APPLE)
+#     message(FATAL_ERROR
+#         "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, "
+#         "since this results in two different versions of libiomp5.dylib (one "
+#         "from the system and one from Torch) being linked to the final "
+#         "executable, which then segfaults"
+#     )
+# endif()
 
 # Bring the `torch` target in scope to allow evaluation
 # of cmake generator expression from `metatensor_torch`
diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor
new file mode 100644
index 00000000000..78075759c40
--- /dev/null
+++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor
@@ -0,0 +1,28 @@
+units metal
+boundary p p p
+
+atom_style atomic/kk
+lattice fcc 3.6
+region box block 0 2 0 2 0 2
+create_box 1 box
+create_atoms 1 box
+
+# labelmap atom 1 Ni  # lammps-kokkos doesn't like this
+mass 1 58.693
+
+velocity all create 123 42
+
+run_style verlet/kk
+
+pair_style metatensor/kk nickel-lj.pt device cuda
+pair_coeff * * 28
+
+timestep 0.001
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(100 * dt)
+
+thermo 1
+thermo_style custom step temp pe etotal press vol
+
+# dump 1 all atom 10 dump.metatensor
+
+run 100
diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
new file mode 100644
index 00000000000..e4fc076e04f
--- /dev/null
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -0,0 +1,390 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Guillaume Fraux <guillaume.fraux@epfl.ch>
+------------------------------------------------------------------------- */
+#include "metatensor_system_kokkos.h"
+
+#include "atom.h"
+#include "domain.h"
+#include "error.h"
+#include "neighbor.h"
+
+#include "neigh_list.h"
+#include "neigh_request.h"
+
+#include "kokkos.h"
+#include "atom_kokkos.h"
+
+#ifndef KOKKOS_ENABLE_CUDA
+namespace Kokkos {
+class Cuda {};
+} // namespace Kokkos
+#endif // KOKKOS_ENABLE_CUDA
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+template<class LMPDeviceType>
+MetatensorSystemAdaptorKokkos<LMPDeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options):
+    Pointers(lmp),
+    list_(nullptr),
+    options_(std::move(options)),
+    caches_(),
+    atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32)))
+{
+    torch::Device device = torch::kCPU;
+    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
+        device = torch::kCUDA;
+    } else {
+        device = torch::kCPU;
+    }
+
+    // We ask LAMMPS for a full neighbor lists because we need to know about
+    // ALL pairs, even if options->full_list() is false. We will then filter
+    // the pairs to only include each pair once where needed.
+    auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+    request->set_id(0);
+    request->set_cutoff(options_.interaction_range);
+
+    this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true));
+}
+
+template<class LMPDeviceType>
+MetatensorSystemAdaptorKokkos<LMPDeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Compute* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options):
+    Pointers(lmp),
+    list_(nullptr),
+    options_(std::move(options)),
+    caches_(),
+    atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32)))
+{
+    torch::Device device = torch::kCPU;
+    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
+        device = torch::kCUDA;
+    } else {
+        device = torch::kCPU;
+    }
+
+    auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
+    request->set_id(0);
+    request->set_cutoff(options_.interaction_range);
+
+    this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true));
+}
+
+template<class LMPDeviceType>
+MetatensorSystemAdaptorKokkos<LMPDeviceType>::~MetatensorSystemAdaptorKokkos() {
+
+}
+
+template<class LMPDeviceType>
+void MetatensorSystemAdaptorKokkos<LMPDeviceType>::init_list(int id, NeighList* ptr) {
+    assert(id == 0);
+    list_ = ptr;
+}
+
+template<class LMPDeviceType>
+void MetatensorSystemAdaptorKokkos<LMPDeviceType>::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) {
+    if (cutoff > options_.interaction_range) {
+        error->all(FLERR,
+            "Invalid metatensor model: one of the requested neighbor lists "
+            "has a cutoff ({}) larger than the model interaction range ({})",
+            cutoff, options_.interaction_range
+        );
+    } else if (cutoff < 0 || !std::isfinite(cutoff)) {
+        error->all(FLERR,
+            "model requested an invalid cutoff for neighbors list: {} "
+            "(cutoff in model units is {})",
+            cutoff, request->cutoff()
+        );
+    }
+
+    caches_.push_back({
+        cutoff,
+        request,
+        /*known_samples = */ {},
+        /*samples = */ {},
+        /*distances_f64 = */ {},
+        /*distances_f32 = */ {},
+    });
+}
+
+
+template<class LMPDeviceType>
+void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors(metatensor_torch::System& system) {
+    // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl;
+    auto dtype = system->positions().scalar_type();
+    auto device = system->positions().device();
+
+    auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
+    auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
+
+    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64);
+    // it might be a good idea to have this as float32 if the model is using float32
+    // to speed up the computation, especially on GPU
+
+
+    /*-------------- whatever, this will be done on CPU for now ------------------------*/
+
+    // Collect the local atom id of all local & ghosts atoms, mapping ghosts
+    // atoms which are periodic images of local atoms back to the local atoms.
+    //
+    // Metatensor expects pairs corresponding to periodic atoms to be between
+    // the main atoms, but using the actual distance vector between the atom and
+    // the ghost.
+    original_atom_id_.clear();
+    original_atom_id_.reserve(total_n_atoms);
+
+    // identify all local atom by their LAMMPS atom tag.
+    local_atoms_tags_.clear();
+    for (int i=0; i<atom->nlocal; i++) {
+        original_atom_id_.emplace_back(i);
+        local_atoms_tags_.emplace(atom->tag[i], i);
+    }
+
+    // now loop over ghosts & map them back to the main cell if needed
+    ghost_atoms_tags_.clear();
+    for (int i=atom->nlocal; i<total_n_atoms; i++) {
+        auto tag = atom->tag[i];
+        auto it = local_atoms_tags_.find(tag);
+        if (it != local_atoms_tags_.end()) {
+            // this is the periodic image of an atom already owned by this domain
+            original_atom_id_.emplace_back(it->second);
+        } else {
+            // this can either be a periodic image of an atom owned by another
+            // domain, or directly an atom from another domain. Since we can not
+            // really distinguish between these, we take the first atom as the
+            // "main" one and remap all atoms with the same tag to the first one
+            auto it = ghost_atoms_tags_.find(tag);
+            if (it != ghost_atoms_tags_.end()) {
+                // we already found this atom elsewhere in the system
+                original_atom_id_.emplace_back(it->second);
+            } else {
+                // this is the first time we are seeing this atom
+                original_atom_id_.emplace_back(i);
+                ghost_atoms_tags_.emplace(tag, i);
+            }
+        }
+    }
+    /*----------- end of whatever, this will be done on CPU for now --------------*/
+
+    auto original_atom_id_tensor = torch::from_blob(
+        original_atom_id_.data(),
+        {total_n_atoms},
+        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
+    );
+    original_atom_id_tensor = original_atom_id_tensor.to(device);  // RIP
+
+    // Accumulate total number of pairs
+    int total_number_of_pairs = 0;
+    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
+        total_number_of_pairs += list_->numneigh[ii];
+    }
+    std::vector<int> centers(total_number_of_pairs);
+    std::vector<int> neighbors(total_number_of_pairs);
+
+    // Fill the centers and neighbors arrays with the original atom ids
+    int pair_index = 0;
+    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
+        auto atom_i = list_->ilist[ii];
+        auto neighbors_ii = list_->firstneigh[ii];
+        for (int jj=0; jj<list_->numneigh[ii]; jj++) {
+            centers[pair_index] = atom_i;
+            neighbors[pair_index] = neighbors_ii[jj];
+            pair_index++;
+        }
+    }
+
+    // Create torch tensors for the centers and neighbors arrays
+    auto centers_tensor = torch::from_blob(
+        centers.data(),
+        {total_number_of_pairs},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
+    );
+    auto neighbors_tensor = torch::from_blob(
+        neighbors.data(),
+        {total_number_of_pairs},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
+    );
+
+    // change centers and neighbors to the original atom ids
+    auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor);
+    auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor);
+
+    // create torch tensor with the positions (TEMPORARY, TODO: change)
+    auto positions_tensor = torch::from_blob(
+        positions_kokkos.data(),
+        {total_n_atoms, 3},
+        torch::TensorOptions().dtype(torch::kFloat64).device(device)
+    );
+
+    for (auto& cache: caches_) {
+        // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!)
+        auto full_list = cache.options->full_list();
+        if (!full_list) {
+            auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id;
+            centers_tensor = centers_tensor.masked_select(half_list_mask);
+            neighbors_tensor = neighbors_tensor.masked_select(half_list_mask);
+            centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask);
+            neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask);
+        }
+
+        // distance mask
+        auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor);
+        auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff;
+
+        // index everything with the mask
+        auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask);
+        auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask);
+        auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()});
+
+        // find filtered interatomic vectors using the original atoms
+        auto interatomic_vectors_original_filtered = positions_tensor.index_select(0, neighbors_tensor_original_id_filtered) - positions_tensor.index_select(0, centers_tensor_original_id_filtered);
+
+        // cell shifts
+        auto pair_shifts = interatomic_vectors_filtered - interatomic_vectors_original_filtered;
+        auto cell_shifts = pair_shifts.matmul(cell_inv_tensor);
+        cell_shifts = torch::round(cell_shifts).to(torch::kInt32);
+
+        if (!full_list) {
+            auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered;
+            auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0;
+            // reproduce this mask with torch:
+            // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0)))
+            auto edge_mask = (
+                torch::sum(cell_shifts, 1) == 0 & (
+                    cell_shifts.index({torch::indexing::Slice(), 2}) < 0 | (
+                        cell_shifts.index({torch::indexing::Slice(), 2}) == 0 &
+                        cell_shifts.index({torch::indexing::Slice(), 1}) < 0
+                    )
+                )
+            );
+            auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask));
+            centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask);
+            neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask);
+            interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()});
+            cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()});
+        }
+
+        centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1);
+        neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1);
+        auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1);
+
+        auto [samples_values_unique, samples_inverse, _] = torch::unique_dim(
+            samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false
+        );
+
+        auto permutation = torch::arange(samples_inverse.size(0), samples_inverse.options());
+        samples_inverse = samples_inverse.flip({0});
+        permutation = permutation.flip({0});
+
+        auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options());
+        sample_indices.scatter_(0, samples_inverse, permutation);
+
+        auto samples = torch::make_intrusive<metatensor_torch::LabelsHolder>(
+            std::vector<std::string>{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"},
+            samples_values_unique
+        );
+
+        auto neighbor_list = torch::make_intrusive<metatensor_torch::TensorBlockHolder>(
+            interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device),
+            samples->to(device),
+            std::vector<metatensor_torch::TorchLabels>{
+                metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device),
+            },
+            metatensor_torch::LabelsHolder::create({"distance"}, {{0}})->to(device)
+        );
+
+        metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency);
+        system->add_neighbor_list(cache.options, neighbor_list);
+    }
+}
+
+
+template<class LMPDeviceType>
+metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_from_lmp(
+    bool do_virial,
+    torch::ScalarType dtype,
+    torch::Device device
+) {
+    // std::cout << "MetatensorSystemAdaptorKokkos::system_from_lmp" << std::endl;
+    auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
+
+    auto atom_types_lammps_kokkos = atomKK->k_type.view<LMPDeviceType>();
+    auto mapping = options_.types_mapping_kokkos;
+    Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms);   /// Can be a class member? (allocation alert)
+
+    Kokkos::parallel_for(
+        "MetatensorSystemAdaptorKokkos::system_from_lmp::atom_types_mapping",
+        Kokkos::RangePolicy(0, total_n_atoms),
+        KOKKOS_LAMBDA(int i)
+    {
+        atom_types_metatensor_kokkos(i) = mapping(atom_types_lammps_kokkos(i));
+    });
+
+    atomic_types_ = torch::from_blob(
+        atom_types_metatensor_kokkos.data(),
+        {total_n_atoms},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
+    ).clone();  /// Again, allocation alert. Not sure if this can be avoided
+
+    auto tensor_options = torch::TensorOptions().dtype(torch::kFloat64).device(device);
+
+    // atom->x contains "real" and then ghost atoms, in that order
+    auto positions_kokkos = atomKK->k_x.view<LMPDeviceType>();
+    this->positions = torch::from_blob(
+        positions_kokkos.data(), {total_n_atoms, 3},
+        // requires_grad=true since we always need gradients w.r.t. positions
+        tensor_options
+    ).clone().requires_grad_(true);  /// Allocation alert (clone)
+
+    auto cell = torch::zeros({3, 3}, tensor_options);  /// Allocation alert, we could make it a class member and allocate it once
+    /// domain doesn't seem to have a Kokkos version
+    cell[0][0] = domain->xprd;
+
+    cell[1][0] = domain->xy;
+    cell[1][1] = domain->yprd;
+
+    cell[2][0] = domain->xz;
+    cell[2][1] = domain->yz;
+    cell[2][2] = domain->zprd;
+    /// And the other elements? Are they always zero?
+
+    auto system_positions = this->positions;
+    cell = cell.to(dtype).to(device);   /// to(device) alert. How do we find the cell on Kokkos?
+
+    if (do_virial) {
+        auto model_strain = this->strain.to(dtype);  /// already on the correct device
+
+        // pretend to scale positions/cell by the strain so that
+        // it enters the computational graph.
+        system_positions = system_positions.matmul(model_strain);
+        cell = cell.matmul(model_strain);
+    }
+
+    auto system = torch::make_intrusive<metatensor_torch::SystemHolder>(
+        atomic_types_,
+        system_positions,
+        cell
+    );
+
+    this->setup_neighbors(system);
+    return system;
+}
+
+namespace LAMMPS_NS {
+template class MetatensorNeighborsDataKokkos<LMPDeviceType>;
+template class MetatensorSystemAdaptorKokkos<LMPDeviceType>;
+}
diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h
new file mode 100644
index 00000000000..3a16384c379
--- /dev/null
+++ b/src/KOKKOS/metatensor_system_kokkos.h
@@ -0,0 +1,141 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_METATENSOR_SYSTEM_KOKKOS_H
+#define LMP_METATENSOR_SYSTEM_KOKKOS_H
+
+#include <vector>
+#include <array>
+#include <unordered_set>
+
+#include "pointers.h"
+#include "pair.h"
+#include "neigh_list.h"
+#include "kokkos.h"
+
+#include <metatensor/torch/atomistic.hpp>
+
+
+namespace LAMMPS_NS {
+
+template<class LMPDeviceType>
+struct MetatensorSystemOptionsKokkos {
+    // Mapping from LAMMPS types to metatensor types
+    const int32_t* types_mapping;
+    const Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> types_mapping_kokkos;
+    // interaction range of the model, in LAMMPS units
+    double interaction_range;
+    // should we run extra checks on the neighbor lists?
+    bool check_consistency;
+};
+
+// data for metatensor neighbors lists
+template<class LMPDeviceType>
+struct MetatensorNeighborsDataKokkos {
+    // single neighbors sample containing [i, j, S_a, S_b, S_c]
+    using sample_t = std::array<int32_t, 5>;
+
+    struct SampleHasher {
+        static void hash_combine(std::size_t& seed, const int32_t& v) {
+            seed ^= std::hash<int32_t>()(v) + 0x9e3779b9 + (seed<<6) + (seed>>2);
+        }
+
+        size_t operator()(const sample_t& s) const {
+            size_t hash = 0;
+            hash_combine(hash, s[0]);
+            hash_combine(hash, s[1]);
+            hash_combine(hash, s[2]);
+            hash_combine(hash, s[3]);
+            hash_combine(hash, s[4]);
+            return hash;
+        }
+    };
+
+    // cutoff for this NL in LAMMPS units
+    double cutoff;
+    // options of the NL as requested by the model
+    metatensor_torch::NeighborListOptions options;
+
+    // Below are cached allocations for the LAMMPS -> metatensor NL translation
+    // TODO: report memory usage for these?
+
+    // we keep the set of samples twice: once in `known_samples` to remove
+    // duplicated pairs, and once in `samples` in a format that can be
+    // used to create a torch::Tensor.
+    std::unordered_set<sample_t, SampleHasher> known_samples;
+    std::vector<sample_t> samples;
+    // pairs distances vectors
+    std::vector<std::array<double, 3>> distances_f64;
+    std::vector<std::array<float, 3>> distances_f32;
+};
+
+template<class LMPDeviceType>
+class MetatensorSystemAdaptorKokkos : public Pointers {
+public:
+    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options);
+    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options);
+
+    ~MetatensorSystemAdaptorKokkos();
+
+    void init_list(int id, NeighList* ptr);
+
+
+    void add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request);
+
+    // Create a metatensor system matching the LAMMPS system data
+    metatensor_torch::System system_from_lmp(bool do_virial, torch::ScalarType dtype, torch::Device device);
+
+    // Explicit strain for virial calculations. This uses the same dtype/device
+    // as LAMMPS data (positions, …)
+    torch::Tensor strain;
+    // keep the positions as coming from LAMMPS (before any dtype/device
+    // conversion) to access its gradient
+    torch::Tensor positions;
+
+private:
+    // setup the metatensor neighbors list from the internal LAMMPS one
+    void setup_neighbors(metatensor_torch::System& system);
+
+    // options for this system adaptor
+    MetatensorSystemOptionsKokkos<LMPDeviceType> options_;
+
+    // LAMMPS NL
+    NeighList* list_;
+    // allocations caches for all the NL requested by
+    // the model
+    std::vector<MetatensorNeighborsDataKokkos<LMPDeviceType>> caches_;
+    // allocation cache for the atomic types in the system
+    torch::Tensor atomic_types_;
+    // allocation cache holding the "original atom" id for all atoms in the
+    // system. This is the same as the atom id for all local atoms. For ghost
+    // atoms, this is either the id of the corresponding local atom if the ghost
+    // is a periodic image of a local atom, the id of the first ghost we found
+    // with a given atom tag if the ghost is a periodic image of another ghost;
+    // or the id of the ghost in all other cases.
+    std::vector<int> original_atom_id_;
+    // allocation cache holding the map from atom tag to atom id for local
+    // atoms.
+    std::unordered_map<tagint, int> local_atoms_tags_;
+    // allocation cache holding the map from atom tag to atom id for ghost
+    // atoms. When there are multiple periodic images of the same atom, only one
+    // will be included here.
+    std::unordered_map<tagint, int> ghost_atoms_tags_;
+
+    // TODO: should we use LAMMPS allocations/deallocation facilities for the
+    // allocation caches? If we don't, should we report memory usage from the
+    // allocations caches to LAMMPS one way or another?
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
new file mode 100644
index 00000000000..51f7f92fc11
--- /dev/null
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -0,0 +1,637 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing authors: Guillaume Fraux <guillaume.fraux@epfl.ch>
+------------------------------------------------------------------------- */
+#include "pair_metatensor_kokkos.h"
+
+#include "atom.h"
+#include "error.h"
+#include "force.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "update.h"
+#include "citeme.h"
+#include "comm.h"
+
+#include "neigh_list.h"
+
+#include "kokkos.h"
+#include "atom_kokkos.h"
+#include "pair_kokkos.h"
+#include "atom_masks.h"
+
+#include <torch/version.h>
+#include <torch/script.h>
+#include <torch/cuda.h>
+
+#if TORCH_VERSION_MAJOR >= 2
+    #include <torch/mps.h>
+#endif
+
+#include <memory>
+
+#include <metatensor/torch.hpp>
+#include <metatensor/torch/atomistic.hpp>
+
+#include "metatensor_system_kokkos.h"
+
+#ifndef KOKKOS_ENABLE_CUDA
+namespace Kokkos {
+class Cuda {};
+} // namespace Kokkos
+#endif // KOKKOS_ENABLE_CUDA
+
+using namespace LAMMPS_NS;
+
+struct LAMMPS_NS::PairMetatensorDataKokkos {
+    PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit);
+
+    void load_model(LAMMPS* lmp, const char* path, const char* extensions_directory);
+
+    // torch model in metatensor format
+    std::unique_ptr<torch::jit::Module> model;
+    // device to use for the calculations
+    torch::Device device;
+    // model capabilities, declared by the model
+    metatensor_torch::ModelCapabilities capabilities;
+    // run-time evaluation options, decided by this class
+    metatensor_torch::ModelEvaluationOptions evaluation_options;
+    // should metatensor check the data LAMMPS send to the model
+    // and the data the model returns?
+    bool check_consistency;
+    // how far away the model needs to know about neighbors
+    double interaction_range;
+
+    // allocation cache for the selected atoms
+    torch::Tensor selected_atoms_values;
+    // adaptor from LAMMPS system to metatensor's
+    std::unique_ptr<MetatensorSystemAdaptorKokkos<LMPDeviceType>> system_adaptor;
+};
+
+PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit):
+    system_adaptor(nullptr),
+    device(torch::kCPU),
+    check_consistency(false),
+    interaction_range(-1)
+{
+    auto options = torch::TensorOptions().dtype(torch::kInt32);
+    this->selected_atoms_values = torch::zeros({0, 2}, options);
+
+    // default to true for now, this will be changed to false later
+    this->check_consistency = true;
+
+    // Initialize evaluation_options
+    this->evaluation_options = torch::make_intrusive<metatensor_torch::ModelEvaluationOptionsHolder>();
+    this->evaluation_options->set_length_unit(std::move(length_unit));
+
+    auto output = torch::make_intrusive<metatensor_torch::ModelOutputHolder>();
+    output->explicit_gradients = {};
+    output->set_quantity("energy");
+    output->set_unit(std::move(energy_unit));
+    output->per_atom = false;
+
+    this->evaluation_options->outputs.insert("energy", output);
+}
+
+void PairMetatensorDataKokkos::load_model(
+    LAMMPS* lmp,
+    const char* path,
+    const char* extensions_directory
+) {
+    // TODO: seach for the model & extensions inside `$LAMMPS_POTENTIALS`?
+
+    if (this->model != nullptr) {
+        lmp->error->all(FLERR, "torch model is already loaded");
+    }
+
+    torch::optional<std::string> extensions = torch::nullopt;
+    if (extensions_directory != nullptr) {
+        extensions = std::string(extensions_directory);
+    }
+
+    try {
+        this->model = std::make_unique<torch::jit::Module>(
+            metatensor_torch::load_atomistic_model(path, extensions)
+        );
+    } catch (const c10::Error& e) {
+        lmp->error->all(FLERR, "failed to load metatensor model at '{}': {}", path, e.what());
+    }
+
+    auto capabilities_ivalue = this->model->run_method("capabilities");
+    this->capabilities = capabilities_ivalue.toCustomClass<metatensor_torch::ModelCapabilitiesHolder>();
+
+    if (!this->capabilities->outputs().contains("energy")) {
+        lmp->error->all(FLERR, "the model at '{}' does not have an \"energy\" output, we can not use it in pair_style metatensor", path);
+    }
+
+    if (lmp->comm->me == 0) {
+        auto metadata_ivalue = this->model->run_method("metadata");
+        auto metadata = metadata_ivalue.toCustomClass<metatensor_torch::ModelMetadataHolder>();
+        auto to_print = metadata->print();
+
+        if (lmp->screen) {
+            fprintf(lmp->screen, "\n%s\n", to_print.c_str());
+        }
+        if (lmp->logfile) {
+            fprintf(lmp->logfile,"\n%s\n", to_print.c_str());
+        }
+
+        // add the model references to LAMMPS citation handling mechanism
+        for (const auto& it: metadata->references) {
+            for (const auto& ref: it.value()) {
+                lmp->citeme->add(ref + "\n");
+            }
+        }
+    }
+}
+
+
+/* ---------------------------------------------------------------------- */
+
+template<class LMPDeviceType>
+PairMetatensorKokkos<LMPDeviceType>::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) {
+    std::string energy_unit;
+    std::string length_unit;
+    if (strcmp(update->unit_style, "real") == 0) {
+        length_unit = "angstrom";
+        energy_unit = "kcal/mol";
+    } else if (strcmp(update->unit_style, "metal") == 0) {
+        length_unit = "angstrom";
+        energy_unit = "eV";
+    } else if (strcmp(update->unit_style, "si") == 0) {
+        length_unit = "meter";
+        energy_unit = "joule";
+    } else if (strcmp(update->unit_style, "electron") == 0) {
+        length_unit = "Bohr";
+        energy_unit = "Hartree";
+    } else {
+        error->all(FLERR, "unsupported units '{}' for pair metatensor ", update->unit_style);
+    }
+
+    // we might not be running a pure pair potential,
+    // so we can not compute virial as fdotr
+    this->no_virial_fdotr_compute = 1;
+
+    this->mts_data = new PairMetatensorDataKokkos(std::move(length_unit), std::move(energy_unit));
+}
+
+template<class LMPDeviceType>
+PairMetatensorKokkos<LMPDeviceType>::~PairMetatensorKokkos() {
+    delete this->mts_data;
+
+    if (allocated) {
+        memory->destroy(setflag);
+        memory->destroy(cutsq);
+        memory->destroy(type_mapping);
+    }
+}
+
+// called when finding `pair_style metatensor` in the input
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
+    std::cout << "settings" << std::endl;
+
+    if (argc == 0) {
+        error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc);
+    }
+
+    const char* model_path = argv[0];
+    const char* extensions_directory = nullptr;
+    const char* requested_device = nullptr;
+    for (int i=1; i<argc; i++) {
+        if (strcmp(argv[i], "check_consistency") == 0) {
+            if (i == argc - 1) {
+                error->all(FLERR, "expected <on/off> after 'check_consistency' in pair_style metatensor, got nothing");
+            } else if (strcmp(argv[i + 1], "on") == 0) {
+                mts_data->check_consistency = true;
+            } else if (strcmp(argv[i + 1], "off") == 0) {
+                mts_data->check_consistency = false;
+            } else {
+                error->all(FLERR, "expected <on/off> after 'check_consistency' in pair_style metatensor, got '{}'", argv[i + 1]);
+            }
+
+            i += 1;
+        } else if (strcmp(argv[i], "extensions") == 0) {
+            if (i == argc - 1) {
+                error->all(FLERR, "expected <path> after 'extensions' in pair_style metatensor, got nothing");
+            }
+            extensions_directory = argv[i + 1];
+            i += 1;
+        } else if (strcmp(argv[i], "device") == 0) {
+            if (i == argc - 1) {
+                error->all(FLERR, "expected string after 'device' in pair_style metatensor, got nothing");
+            }
+            requested_device = argv[i + 1];
+            i += 1;
+        } else {
+            error->all(FLERR, "unexpected argument to pair_style metatensor: '{}'", argv[i]);
+        }
+    }
+
+    mts_data->load_model(this->lmp, model_path, extensions_directory);
+
+    // Select the device to use based on the model's preference, the user choice
+    // and what's available.
+    auto available_devices = std::vector<torch::Device>();
+    for (const auto& device: mts_data->capabilities->supported_devices) {
+        if (device == "cpu") {
+            available_devices.push_back(torch::kCPU);
+        } else if (device == "cuda") {
+            if (torch::cuda::is_available()) {
+                // Get a MPI communicator for all processes on the current node
+                MPI_Comm local;
+                MPI_Comm_split_type(world, MPI_COMM_TYPE_SHARED, 0, MPI_INFO_NULL, &local);
+                // Get the rank of this MPI process on the current node
+                int local_rank;
+                MPI_Comm_rank(local, &local_rank);
+
+                int size;
+                MPI_Comm_size(local, &size);
+                if (size < torch::cuda::device_count()) {
+                    if (comm->me == 0) {
+                        error->warning(FLERR,
+                            "found {} CUDA-capable GPUs, but only {} MPI processes on the current node; the remaining GPUs will not be used",
+                            torch::cuda::device_count(), size
+                        );
+                    }
+                }
+
+                // split GPUs between node-local processes using round-robin allocation
+                int gpu_to_use = local_rank % torch::cuda::device_count();
+                available_devices.push_back(torch::Device(torch::kCUDA, gpu_to_use));
+            }
+        } else if (device == "mps") {
+            #if TORCH_VERSION_MAJOR >= 2
+            if (torch::mps::is_available()) {
+                available_devices.push_back(torch::Device("mps"));
+            }
+            #endif
+        } else {
+            error->warning(FLERR,
+                "the model declared support for unknown device '{}', it will be ignored", device
+            );
+        }
+    }
+
+    if (available_devices.empty()) {
+        error->all(FLERR,
+            "failed to find a valid device for the model at '{}': "
+            "the model supports {}, none of these where available",  /// typo: where -> were
+            model_path, torch::str(mts_data->capabilities->supported_devices)
+        );
+    }
+
+    if (requested_device == nullptr) {
+        // no user request, pick the device the model prefers
+        mts_data->device = available_devices[0];
+    } else {
+        bool found_requested_device = false;
+        for (const auto& device: available_devices) {
+            if (device.is_cpu() && strcmp(requested_device, "cpu") == 0) {
+                mts_data->device = device;
+                found_requested_device = true;
+                break;
+            } else if (device.is_cuda() && strcmp(requested_device, "cuda") == 0) {
+                mts_data->device = device;
+                found_requested_device = true;
+                break;
+            } else if (device.is_mps() && strcmp(requested_device, "mps") == 0) {
+                mts_data->device = device;
+                found_requested_device = true;
+                break;
+            }
+        }
+
+        if (!found_requested_device) {
+            error->all(FLERR,
+                "failed to find requested device ({}): it is either "
+                "not supported by this model or not available on this machine",
+                requested_device
+            );
+        }
+    }
+
+    mts_data->model->to(mts_data->device);
+
+    // Handle potential mismatch between Kokkos and model devices
+    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
+        if (!mts_data->device.is_cuda()) {
+            throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU");
+        }
+    } else {
+        if (!mts_data->device.is_cpu()) {
+            throw std::runtime_error("Kokkos is running on the host, but the model is not on CPU");
+        }
+    }
+
+    auto message = "Running simulation on " + mts_data->device.str() + " device with " + mts_data->capabilities->dtype() + " data";
+    if (screen) {
+        fprintf(screen, "%s\n", message.c_str());
+    }
+    if (logfile) {
+        fprintf(logfile,"%s\n", message.c_str());
+    }
+
+    if (!allocated) {
+        allocate();
+    }
+
+    std::cout << "Running on " << typeid(ExecutionSpaceFromDevice<LMPDeviceType>::space).name() << std::endl;
+}
+
+
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::allocate() {
+    std::cout << "allocate" << std::endl;
+
+    allocated = 1;
+
+    // setflags stores whether the coeff for a given pair of atom types are known
+    /// I'm tempted to change this one to kokkos but I can't find how it's used
+    /// Commented out for now
+    setflag = memory->create(
+        setflag,
+        atom->ntypes + 1,
+        atom->ntypes + 1,
+        "pair:setflag"
+    );
+
+    for (int i = 1; i <= atom->ntypes; i++) {
+        for (int j = i; j <= atom->ntypes; j++) {
+            setflag[i][j] = 0;
+        }
+    }
+
+    /// I noticed that this cutsq isn't used in the code and is not
+    /// necessary to run it. Commented out for now
+
+    // cutsq stores the squared cutoff for each pair
+    cutsq = memory->create(
+        cutsq,
+        atom->ntypes + 1,
+        atom->ntypes + 1,
+        "pair:cutsq"
+    );
+
+    // lammps_types_to_species stores the mapping from lammps atom types to
+    // the metatensor model species
+    /// This will stay non-kokkos for now (only used at initialization)
+    type_mapping = memory->create(
+        type_mapping,
+        atom->ntypes + 1,
+        "PairMetatensor:type_mapping"
+    );
+
+    for (int i = 1; i <= atom->ntypes; i++) {
+        type_mapping[i] = -1;
+    }
+}
+
+template<class LMPDeviceType>
+double PairMetatensorKokkos<LMPDeviceType>::init_one(int, int) {
+    std::cout << "init_one" << std::endl;
+    return mts_data->interaction_range;
+}
+
+
+// called on pair_coeff
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::coeff(int argc, char ** argv) {
+    std::cout << "coeff" << std::endl;
+    if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) {
+        error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * <list of types>`");
+    }
+
+    if (atom->ntypes != argc - 2) {
+        error->all(FLERR,
+            "invalid pair_coeff, expected `pair_coeff * * <list of types>` with {} types",
+            atom->ntypes
+        );
+    }
+
+    for (int lammps_type=1; lammps_type<argc - 1; lammps_type++) {
+        int type = utils::inumeric(FLERR, argv[lammps_type + 1], true, lmp);
+        type_mapping[lammps_type] = type;
+    }
+
+    // mark all pairs coeffs as known
+    for (int i = 1; i <= atom->ntypes; i++) {
+        for (int j = 1; j <= atom->ntypes; j++) {
+            setflag[i][j] = 1;
+            setflag[j][i] = 1;
+        }
+    }
+}
+
+
+// called when the run starts
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::init_style() {
+    std::cout << "init_style" << std::endl;
+    // Require newton pair on since we need to communicate forces accumulated on
+    // ghost atoms to neighboring domains. These forces contributions come from
+    // gradient of a local descriptor w.r.t. domain ghosts (periodic images
+    // ghosts are handled separately).
+    /// Would be good if we could change this because Newton off is the Kokkos default
+    if (force->newton_pair != 1) {
+        error->all(FLERR, "Pair style metatensor requires newton pair on");
+    }
+
+    // get the model's interaction range
+    auto range = mts_data->capabilities->engine_interaction_range(mts_data->evaluation_options->length_unit());
+    if (range < 0) {
+        error->all(FLERR, "interaction_range is negative for this model");
+    } else if (!std::isfinite(range)) {
+        error->all(FLERR, "interaction_range is infinite for this model, this is not yet supported");
+    } else {
+        mts_data->interaction_range = range;
+    }
+
+    /// create Kokkos view for type_mapping
+    Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> type_mapping_kokkos("type_mapping", atom->ntypes + 1);
+    /// copy type_mapping to the Kokkos view (via a host mirror view)
+    auto type_mapping_kokkos_host = Kokkos::create_mirror_view(type_mapping_kokkos);
+    for (int i = 0; i < atom->ntypes + 1; i++) {
+        type_mapping_kokkos_host(i) = type_mapping[i];
+    }
+    Kokkos::deep_copy(type_mapping_kokkos, type_mapping_kokkos_host);
+
+    // create system adaptor
+    auto options = MetatensorSystemOptionsKokkos<LMPDeviceType>{
+        this->type_mapping,
+        type_mapping_kokkos,
+        mts_data->interaction_range,
+        mts_data->check_consistency,
+    };
+    mts_data->system_adaptor = std::make_unique<MetatensorSystemAdaptorKokkos<LMPDeviceType>>(lmp, this, options);
+
+    // Translate from the metatensor neighbor lists requests to LAMMPS neighbor
+    // lists requests.
+    auto requested_nl = mts_data->model->run_method("requested_neighbor_lists");
+    for (const auto& ivalue: requested_nl.toList()) {
+        auto options = ivalue.get().toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
+        auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit());
+
+        mts_data->system_adaptor->add_nl_request(cutoff, options);
+    }
+}
+
+
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::init_list(int id, NeighList *ptr) {
+    std::cout << "init_list" << std::endl;
+    mts_data->system_adaptor->init_list(id, ptr);
+    std::cout << "init_list done" << std::endl;
+}
+
+
+template<class LMPDeviceType>
+void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
+    // auto x = atomKK->k_x.view<LMPDeviceType>();
+    // auto h_array = Kokkos::create_mirror_view(d_array);
+    // Kokkos::deep_copy(h_array, d_array);
+    // // Print the values on the host
+    // for (int i = 0; i < 32; ++i) {
+    //     for (int j = 0; j < 3; ++j) {
+    //         std::cout << h_array(i, j) << " ";
+    //     }
+    //     std::cout << std::endl;
+    // }
+
+    /// Declare what we need to read from the atomKK object and what we will modify
+    atomKK->sync(ExecutionSpaceFromDevice<LMPDeviceType>::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
+    this->atomKK->modified(ExecutionSpaceFromDevice<LMPDeviceType>::space, ENERGY_MASK | F_MASK | VIRIAL_MASK);
+
+    if (eflag || vflag) {
+        ev_setup(eflag, vflag);
+    } else {
+        evflag = vflag_fdotr = eflag_global = eflag_atom = 0;
+    }
+
+    if (eflag_atom) {
+        mts_data->evaluation_options->outputs.at("energy")->per_atom = true;
+    } else {
+        mts_data->evaluation_options->outputs.at("energy")->per_atom = false;
+    }
+
+    auto dtype = torch::kFloat64;
+    if (mts_data->capabilities->dtype() == "float64") {
+        dtype = torch::kFloat64;
+    } else if (mts_data->capabilities->dtype() == "float32") {
+        dtype = torch::kFloat32;
+    } else {
+        error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype());
+    }
+
+    // transform from LAMMPS to metatensor System
+    auto system = mts_data->system_adaptor->system_from_lmp(
+        static_cast<bool>(vflag_global), dtype, mts_data->device
+    );
+
+    // only run the calculation for atoms actually in the current domain
+    mts_data->selected_atoms_values.resize_({atom->nlocal, 2});
+    for (int i=0; i<atom->nlocal; i++) {
+        mts_data->selected_atoms_values[i][0] = 0;
+        mts_data->selected_atoms_values[i][1] = i;
+    }
+    auto selected_atoms = torch::make_intrusive<metatensor_torch::LabelsHolder>(
+        std::vector<std::string>{"system", "atom"}, mts_data->selected_atoms_values
+    );
+    mts_data->evaluation_options->set_selected_atoms(selected_atoms->to(mts_data->device));
+
+    torch::IValue result_ivalue;
+    try {
+        result_ivalue = mts_data->model->forward({
+            std::vector<metatensor_torch::System>{system},
+            mts_data->evaluation_options,
+            mts_data->check_consistency
+        });
+    } catch (const std::exception& e) {
+        error->all(FLERR, "error evaluating the torch model: {}", e.what());
+    }
+
+    auto result = result_ivalue.toGenericDict();
+    auto energy = result.at("energy").toCustomClass<metatensor_torch::TensorMapHolder>();
+    auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values();
+    auto energy_detached = energy_tensor.detach().to(torch::kCPU).to(torch::kFloat64);
+
+    // store the energy returned by the model
+    torch::Tensor global_energy;
+    if (eflag_atom) {
+        auto energies = energy_detached.accessor<double, 2>();
+        for (int i=0; i<atom->nlocal + atom->nghost; i++) {
+            // TODO: handle out of order samples
+            eatom[i] += energies[i][0];
+        }
+
+        global_energy = energy_detached.sum(0);
+        assert(energy_detached.sizes() == std::vector<int64_t>({1}));
+    } else {
+        assert(energy_detached.sizes() == std::vector<int64_t>({1, 1}));
+        global_energy = energy_detached.reshape({1});
+    }
+
+    if (eflag_global) {
+        eng_vdwl += global_energy.item<double>();
+    }
+
+    // reset gradients to zero before calling backward
+    mts_data->system_adaptor->positions.mutable_grad() = torch::Tensor();
+    mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor();
+
+    // compute forces/virial with backward propagation
+    energy_tensor.backward(-torch::ones_like(energy_tensor));
+    auto forces_tensor = mts_data->system_adaptor->positions.grad();
+    assert(forces_tensor.scalar_type() == torch::kFloat64);
+
+    auto forces_lammps_kokkos = this->atomKK->k_f. template view<LMPDeviceType>();
+    /// Is it possible to do double*[3] here?
+    auto forces_metatensor_kokkos = Kokkos::View<double**, Kokkos::LayoutRight, LMPDeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(forces_tensor.contiguous().data_ptr<double>(), atom->nlocal + atom->nghost, 3);
+
+    Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atom->nlocal + atom->nghost, KOKKOS_LAMBDA(const int i) {
+        forces_lammps_kokkos(i, 0) += forces_metatensor_kokkos(i, 0);
+        forces_lammps_kokkos(i, 1) += forces_metatensor_kokkos(i, 1);
+        forces_lammps_kokkos(i, 2) += forces_metatensor_kokkos(i, 2);
+    });
+
+    assert(!vflag_fdotr);
+
+    if (vflag_global) {
+        auto virial_tensor = mts_data->system_adaptor->strain.grad();
+        assert(virial_tensor.scalar_type() == torch::kFloat64);
+
+        // apparently the cell is not supported in Kokkos format,
+        // so it has to be updated on CPU (??)
+        auto predicted_virial_tensor_cpu = virial_tensor.cpu();
+        auto predicted_virial = predicted_virial_tensor_cpu.accessor<double, 2>();
+
+        virial[0] += predicted_virial[0][0];
+        virial[1] += predicted_virial[1][1];
+        virial[2] += predicted_virial[2][2];
+
+        virial[3] += 0.5 * (predicted_virial[1][0] + predicted_virial[0][1]);
+        virial[4] += 0.5 * (predicted_virial[2][0] + predicted_virial[0][2]);
+        virial[5] += 0.5 * (predicted_virial[2][1] + predicted_virial[1][2]);
+    }
+
+    if (vflag_atom) {
+        error->all(FLERR, "per atom virial is not implemented");
+    }
+}
+
+namespace LAMMPS_NS {
+template class PairMetatensorKokkos<LMPDeviceType>;
+/// TODO: Host version
+}
diff --git a/src/KOKKOS/pair_metatensor_kokkos.h b/src/KOKKOS/pair_metatensor_kokkos.h
new file mode 100644
index 00000000000..8f5f144cec5
--- /dev/null
+++ b/src/KOKKOS/pair_metatensor_kokkos.h
@@ -0,0 +1,59 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   https://www.lammps.org/, Sandia National Laboratories
+   LAMMPS Development team: developers@lammps.org
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+#ifdef PAIR_CLASS
+// clang-format off
+PairStyle(metatensor/kk, PairMetatensorKokkos<LMPDeviceType>);
+// clang-format on
+#else
+
+#ifndef LMP_PAIR_METATENSOR_KOKKOS_H
+#define LMP_PAIR_METATENSOR_KOKKOS_H
+
+#include "kokkos_base.h"
+#include "pair_kokkos.h"
+
+namespace LAMMPS_NS {
+
+template<class LMPDeviceType>
+class MetatensorSystemAdaptorKokkos;
+
+struct PairMetatensorDataKokkos;
+
+/// I noticed that most other kokkos packages inherit from their non-kokkos
+/// counterparts. It doesn't look like a good idea to me because
+/// they end up overriding everything... Not doing it here for now.
+template<class LMPDeviceType>
+class PairMetatensorKokkos : public Pair, public KokkosBase {
+public:
+    PairMetatensorKokkos(class LAMMPS *);
+    ~PairMetatensorKokkos();
+
+    void compute(int, int) override;
+    void settings(int, char **) override;
+    void coeff(int, char **) override;
+    void init_style() override;
+    double init_one(int, int) override;
+    void init_list(int id, NeighList *ptr) override;
+
+    void allocate();
+private:
+    PairMetatensorDataKokkos* mts_data;
+
+    // mapping from LAMMPS types to metatensor types
+    int32_t* type_mapping;
+};
+
+}    // namespace LAMMPS_NS
+
+#endif
+#endif
diff --git a/src/ML-METATENSOR/pair_metatensor.cpp b/src/ML-METATENSOR/pair_metatensor.cpp
index 2f606b45525..6777afc5cb6 100644
--- a/src/ML-METATENSOR/pair_metatensor.cpp
+++ b/src/ML-METATENSOR/pair_metatensor.cpp
@@ -518,7 +518,7 @@ void PairMetatensor::compute(int eflag, int vflag) {
         auto samples_values = energy_samples->values().to(torch::kCPU);
         auto samples = samples_values.accessor<int32_t, 2>();
 
-        int64_t n_atoms = atom->nlocal + atom->nghost;
+        // int64_t n_atoms = atom->nlocal + atom->nghost;
         assert(samples_values.sizes() == mts_data->selected_atoms_values.sizes());
 
         auto energies = energy_detached.accessor<double, 2>();

From f4fe5ad2604fd407dcfd9077fdad775d28ab5275 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Tue, 16 Jul 2024 12:59:05 +0200
Subject: [PATCH 02/15] Small GPU fixes

---
 examples/PACKAGES/metatensor/in.metatensor | 4 ++--
 src/KOKKOS/metatensor_system_kokkos.cpp    | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor
index 59a32c89e4a..708f852f88c 100644
--- a/examples/PACKAGES/metatensor/in.metatensor
+++ b/examples/PACKAGES/metatensor/in.metatensor
@@ -17,9 +17,9 @@ pair_style metatensor nickel-lj.pt
 pair_coeff * * 28
 
 timestep 0.001
-fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(100 * dt)
 
-thermo 10
+thermo 1
 thermo_style custom step temp pe etotal press vol
 
 # dump 1 all atom 10 dump.metatensor
diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index e4fc076e04f..91e37324d88 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -211,13 +211,15 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors(metatensor_to
     auto centers_tensor = torch::from_blob(
         centers.data(),
         {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(device)
+        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
     );
+    centers_tensor = centers_tensor.to(device);
     auto neighbors_tensor = torch::from_blob(
         neighbors.data(),
         {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(device)
+        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
     );
+    neighbors_tensor = neighbors_tensor.to(device);
 
     // change centers and neighbors to the original atom ids
     auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor);

From 1699bfedf1468d2c0a091f3999cd1de6a61cbbd4 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Mon, 21 Oct 2024 17:14:28 +0200
Subject: [PATCH 03/15] Profile and fix speed issues

---
 examples/PACKAGES/metatensor/in.kokkos.metatensor | 10 +++++-----
 examples/PACKAGES/metatensor/in.metatensor        |  6 +++---
 examples/PACKAGES/metatensor/readme.txt           |  6 +++---
 src/KOKKOS/metatensor_system_kokkos.cpp           |  8 +++++---
 src/KOKKOS/pair_metatensor_kokkos.cpp             |  3 ++-
 5 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor
index 39c8ae38cd2..39a1cf644b0 100644
--- a/examples/PACKAGES/metatensor/in.kokkos.metatensor
+++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor
@@ -3,7 +3,7 @@ boundary p p p
 
 atom_style atomic/kk
 lattice fcc 3.6
-region box block 0 2 0 2 0 2
+region box block 0 8 0 8 0 8
 create_box 1 box
 create_atoms 1 box
 
@@ -14,15 +14,15 @@ velocity all create 123 42
 
 run_style verlet/kk
 
-pair_style metatensor/kk nickel-lj.pt device cuda
+pair_style metatensor/kk nickel-lj.pt device cuda check_consistency off
 pair_coeff * * 28
 
 timestep 0.001
 fix 1 all nve
 
-thermo 1
-thermo_style custom step temp pe etotal press vol
+thermo 100
+thermo_style custom step temp pe etotal press vol cpu
 
 # dump 1 all atom 10 dump.metatensor
 
-run 100
+run 1000
diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor
index b2e971c188d..9b93563a5c9 100644
--- a/examples/PACKAGES/metatensor/in.metatensor
+++ b/examples/PACKAGES/metatensor/in.metatensor
@@ -19,9 +19,9 @@ pair_coeff * * 28
 timestep 0.001
 fix 1 all nve
 
-thermo 1
-thermo_style custom step temp pe etotal press vol
+thermo 100
+thermo_style custom step temp pe etotal press vol cpu
 
 # dump 1 all atom 10 dump.metatensor
 
-run 100
+run 1000
diff --git a/examples/PACKAGES/metatensor/readme.txt b/examples/PACKAGES/metatensor/readme.txt
index e853f85d828..a09a5131448 100644
--- a/examples/PACKAGES/metatensor/readme.txt
+++ b/examples/PACKAGES/metatensor/readme.txt
@@ -5,10 +5,10 @@ To be compiled as
 cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON
 
 Run the example with
-../../../build/lmp -k on g 1 -pk kokkos newton on -in in.metatensor_kokkos
+../../../build/lmp -k on g 1 -pk kokkos newton on -in in.kokkos.metatensor
 and compare its output with the non-kokkos interface
 ../../../build/lmp -in in.metatensor
 
 
-cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/home/filippo/code/virtualenvs/base/lib/python3.12/site-packages/torch/share/cmake/
-cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DCMAKE_PREFIX_PATH=/home/filippo/code/virtualenvs/base/lib/python3.12/site-packages/torch/share/cmake/
+cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/
+cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/
diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 254d93ae5ee..e5dc729de45 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -27,6 +27,9 @@
 #include "kokkos.h"
 #include "atom_kokkos.h"
 
+// #include <torch/cuda.h>
+// #include <chrono>
+
 #ifndef KOKKOS_ENABLE_CUDA
 namespace Kokkos {
 class Cuda {};
@@ -134,8 +137,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
     auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64);
     // it might be a good idea to have this as float32 if the model is using float32
     // to speed up the computation, especially on GPU
-
-
+    
     /*-------------- whatever, this will be done on CPU for now ------------------------*/
 
     // Collect the local atom id of all local & ghosts atoms, mapping ghosts
@@ -312,6 +314,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
         metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency);
         system->add_neighbor_list(cache.options, neighbor_list);
     }
+
 }
 
 
@@ -515,7 +518,6 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
     torch::ScalarType dtype,
     torch::Device device
 ) {
-    // std::cout << "MetatensorSystemAdaptorKokkos::system_from_lmp" << std::endl;
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
     auto atom_types_lammps_kokkos = atomKK->k_type.view<LMPDeviceType>();
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index 18cd91376ed..12322410764 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -549,7 +549,7 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     // }
 
     /// Declare what we need to read from the atomKK object and what we will modify
-    atomKK->sync(ExecutionSpaceFromDevice<LMPDeviceType>::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
+    this->atomKK->sync(ExecutionSpaceFromDevice<LMPDeviceType>::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
     this->atomKK->modified(ExecutionSpaceFromDevice<LMPDeviceType>::space, ENERGY_MASK | F_MASK | VIRIAL_MASK);
 
     if (eflag || vflag) {
@@ -579,6 +579,7 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     );
 
     // only run the calculation for atoms actually in the current domain
+    // TODO: port to Kokkos
     mts_data->selected_atoms_values.resize_({atom->nlocal, 2});
     for (int i=0; i<atom->nlocal; i++) {
         mts_data->selected_atoms_values[i][0] = 0;

From 8836c228ce0262a362061e11c2adbfb5c40145bb Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Mon, 21 Oct 2024 21:17:24 +0200
Subject: [PATCH 04/15] Fix dtypes

---
 src/KOKKOS/metatensor_system_kokkos.cpp | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index e5dc729de45..963ea7d5aeb 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -127,14 +127,13 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::add_nl_request(double cutoff,
 
 template<class LMPDeviceType>
 void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metatensor_torch::System& system) {
-    // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl;
     auto dtype = system->positions().scalar_type();
     auto device = system->positions().device();
 
     auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
-    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64);
+    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
     // it might be a good idea to have this as float32 if the model is using float32
     // to speed up the computation, especially on GPU
     
@@ -232,7 +231,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
         positions_kokkos.data(),
         {total_n_atoms, 3},
         torch::TensorOptions().dtype(torch::kFloat64).device(device)
-    );
+    ).to(dtype);
 
     for (auto& cache: caches_) {
         // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!)
@@ -320,14 +319,13 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 
 template<class LMPDeviceType>
 void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_no_remap(metatensor_torch::System& system) {
-    // std::cout << "MetatensorSystemAdaptorKokkos::setup_neighbors" << std::endl;
     auto dtype = system->positions().scalar_type();
     auto device = system->positions().device();
 
     auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
-    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(torch::kFloat64);
+    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
     // it might be a good idea to have this as float32 if the model is using float32
     // to speed up the computation, especially on GPU
 
@@ -426,7 +424,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_no_remap(meta
         positions_kokkos.data(),
         {total_n_atoms, 3},
         torch::TensorOptions().dtype(torch::kFloat64).device(device)
-    );
+    ).to(dtype);
 
     for (auto& cache: caches_) {
         // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!)
@@ -538,17 +536,17 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
         torch::TensorOptions().dtype(torch::kInt32).device(device)
     ).clone();  /// Again, allocation alert. Not sure if this can be avoided
 
-    auto tensor_options = torch::TensorOptions().dtype(torch::kFloat64).device(device);
-
     // atom->x contains "real" and then ghost atoms, in that order
     auto positions_kokkos = atomKK->k_x.view<LMPDeviceType>();
+    auto tensor_options_positions = torch::TensorOptions().dtype(torch::kFloat64).device(device);
     this->positions = torch::from_blob(
         positions_kokkos.data(), {total_n_atoms, 3},
         // requires_grad=true since we always need gradients w.r.t. positions
-        tensor_options
+        tensor_options_positions
     ).clone().requires_grad_(true);  /// Allocation alert (clone)
 
-    auto cell = torch::zeros({3, 3}, tensor_options);  /// Allocation alert, we could make it a class member and allocate it once
+    auto tensor_options_cell = torch::TensorOptions().dtype(dtype).device(device);
+    auto cell = torch::zeros({3, 3}, tensor_options_cell);  /// Allocation alert, we could make it a class member and allocate it once
     /// domain doesn't seem to have a Kokkos version
     cell[0][0] = domain->xprd;
 
@@ -560,7 +558,7 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
     cell[2][2] = domain->zprd;
     /// And the other elements? Are they always zero?
 
-    auto system_positions = this->positions;
+    auto system_positions = this->positions.to(dtype);
     cell = cell.to(dtype).to(device);   /// to(device) alert. How do we find the cell on Kokkos?
 
     if (do_virial) {

From ecf5b4af9461dfd65914598d94772e34130c6572 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Tue, 22 Oct 2024 15:31:46 +0200
Subject: [PATCH 05/15] Use the Kokkos NL directly

---
 src/KOKKOS/metatensor_system_kokkos.cpp | 110 ++++++++++++++++--------
 src/KOKKOS/metatensor_system_kokkos.h   |   6 +-
 src/KOKKOS/pair_metatensor_kokkos.cpp   |  59 ++++++++-----
 3 files changed, 118 insertions(+), 57 deletions(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 963ea7d5aeb..0a42a6eb017 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -27,8 +27,8 @@
 #include "kokkos.h"
 #include "atom_kokkos.h"
 
-// #include <torch/cuda.h>
-// #include <chrono>
+#include <torch/cuda.h>
+#include <chrono>
 
 #ifndef KOKKOS_ENABLE_CUDA
 namespace Kokkos {
@@ -61,6 +61,8 @@ MetatensorSystemAdaptorKokkos<LMPDeviceType>::MetatensorSystemAdaptorKokkos(LAMM
     auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
     request->set_id(0);
     request->set_cutoff(options_.interaction_range);
+    request->set_kokkos_host(0);
+    request->set_kokkos_device(1);
 
     this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true));
 }
@@ -127,6 +129,12 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::add_nl_request(double cutoff,
 
 template<class LMPDeviceType>
 void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metatensor_torch::System& system) {
+    // auto start = std::chrono::high_resolution_clock::now();
+    // auto end = std::chrono::high_resolution_clock::now();
+
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
+
     auto dtype = system->positions().scalar_type();
     auto device = system->positions().device();
 
@@ -134,8 +142,6 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
     auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
-    // it might be a good idea to have this as float32 if the model is using float32
-    // to speed up the computation, especially on GPU
     
     /*-------------- whatever, this will be done on CPU for now ------------------------*/
 
@@ -188,39 +194,64 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
     );
     original_atom_id_tensor = original_atom_id_tensor.to(device);  // RIP
 
-    // Accumulate total number of pairs
-    int total_number_of_pairs = 0;
-    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
-        total_number_of_pairs += list_->numneigh[ii];
-    }
-    std::vector<int> centers(total_number_of_pairs);
-    std::vector<int> neighbors(total_number_of_pairs);
+    // torch::cuda::synchronize();
+    // end = std::chrono::high_resolution_clock::now();
+    // std::cout << "  CPU packaging and GPU transfer (1st part): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
 
-    // Fill the centers and neighbors arrays with the original atom ids
-    int pair_index = 0;
-    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
-        auto atom_i = list_->ilist[ii];
-        auto neighbors_ii = list_->firstneigh[ii];
-        for (int jj=0; jj<list_->numneigh[ii]; jj++) {
-            centers[pair_index] = atom_i;
-            neighbors[pair_index] = neighbors_ii[jj] & NEIGHMASK;
-            pair_index++;
-        }
-    }
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
 
-    // Create torch tensors for the centers and neighbors arrays
-    auto centers_tensor = torch::from_blob(
-        centers.data(),
-        {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
+
+    NeighListKokkos<LMPDeviceType>* list_kk = static_cast<NeighListKokkos<LMPDeviceType>*>(this->list_);
+
+    auto numneigh_kk = list_kk->d_numneigh;
+    auto neighbors_kk = list_kk->d_neighbors;
+    auto ilist_kk = list_kk->d_ilist;
+
+    auto max_number_of_neighbors = list_kk->maxneighs;
+
+    // mask neighbors_kk with NEIGHMASK. We take this opportunity to set the
+    // layout of this view to LayoutRight, which we need to feed the pointer to torch
+    Kokkos::View<int**, Kokkos::LayoutRight, LMPDeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
+    Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) {
+        auto local_i = i / max_number_of_neighbors;
+        auto local_j = i % max_number_of_neighbors;
+        neighbors_kk_masked(local_i, local_j) = neighbors_kk(local_i, local_j) & NEIGHMASK;
+    });
+
+    auto numneigh_torch = torch::from_blob(
+        numneigh_kk.data(),
+        {total_n_atoms},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
     );
-    centers_tensor = centers_tensor.to(device);
-    auto neighbors_tensor = torch::from_blob(
-        neighbors.data(),
-        {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
+
+    auto neighbors_torch = torch::from_blob(
+        neighbors_kk_masked.data(),
+        {total_n_atoms, max_number_of_neighbors},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
     );
-    neighbors_tensor = neighbors_tensor.to(device);
+
+    auto ilist_torch = torch::from_blob(
+        ilist_kk.data(),
+        {total_n_atoms},
+        torch::TensorOptions().dtype(torch::kInt32).device(device)
+    );
+
+    auto expanded_arange = torch::arange(max_number_of_neighbors, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(0).expand({total_n_atoms, -1});
+    auto neighbor_2d_mask = expanded_arange < numneigh_torch.unsqueeze(1);
+
+    auto expanded_arange_other_dim = torch::arange(total_n_atoms, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(1).expand({-1, max_number_of_neighbors});
+    auto index_for_ilist = expanded_arange_other_dim.masked_select(neighbor_2d_mask);
+    auto centers_tensor = ilist_torch.index_select(0, index_for_ilist);
+
+    auto neighbors_tensor = neighbors_torch.masked_select(neighbor_2d_mask);
+
+    // torch::cuda::synchronize();
+    // end = std::chrono::high_resolution_clock::now();
+    // std::cout << "  CPU packaging and GPU transfer (2nd part): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
 
     // change centers and neighbors to the original atom ids
     auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor);
@@ -296,13 +327,24 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
         auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options());
         sample_indices.scatter_(0, samples_inverse, permutation);
 
+        // torch::cuda::synchronize();
+        // end = std::chrono::high_resolution_clock::now();
+        // std::cout << "  filtering out stuff: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+
+        // torch::cuda::synchronize();
+        // start = std::chrono::high_resolution_clock::now();
+
         auto samples = torch::make_intrusive<metatensor_torch::LabelsHolder>(
             std::vector<std::string>{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"},
             samples_values_unique
         );
 
+        // torch::cuda::synchronize();
+        // end = std::chrono::high_resolution_clock::now();
+        // std::cout << "  Time to create big labels: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+
         auto neighbor_list = torch::make_intrusive<metatensor_torch::TensorBlockHolder>(
-            interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device),
+            interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1),
             samples->to(device),
             std::vector<metatensor_torch::TorchLabels>{
                 metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device),
diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h
index c1c661edb60..fbd788235af 100644
--- a/src/KOKKOS/metatensor_system_kokkos.h
+++ b/src/KOKKOS/metatensor_system_kokkos.h
@@ -107,7 +107,10 @@ class MetatensorSystemAdaptorKokkos : public Pointers {
     // conversion) to access its gradient
     torch::Tensor positions;
 
-private:
+
+    // These two are not private otherwise Kokkos can't see the lambdas
+    // defined inside them
+
     // setup the metatensor neighbors list from the internal LAMMPS one,
     // remapping periodic ghosts to the corresponding local atom
     void setup_neighbors_remap(metatensor_torch::System& system);
@@ -118,6 +121,7 @@ class MetatensorSystemAdaptorKokkos : public Pointers {
     // This produces a larger NL but skips the cost of the remapping
     void setup_neighbors_no_remap(metatensor_torch::System& system);
 
+private:
     // options for this system adaptor
     MetatensorSystemOptionsKokkos<LMPDeviceType> options_;
 
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index 12322410764..bb055f11cf8 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -47,6 +47,8 @@
 
 #include "metatensor_system_kokkos.h"
 
+#include <chrono>
+
 #ifndef KOKKOS_ENABLE_CUDA
 namespace Kokkos {
 class Cuda {};
@@ -77,8 +79,6 @@ struct LAMMPS_NS::PairMetatensorDataKokkos {
     // how far away the model needs to know about neighbors
     double max_cutoff;
 
-    // allocation cache for the selected atoms
-    torch::Tensor selected_atoms_values;
     // adaptor from LAMMPS system to metatensor's
     std::unique_ptr<MetatensorSystemAdaptorKokkos<LMPDeviceType>> system_adaptor;
 };
@@ -90,9 +90,6 @@ PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std:
     remap_pairs(true),
     max_cutoff(-1)
 {
-    auto options = torch::TensorOptions().dtype(torch::kInt32);
-    this->selected_atoms_values = torch::zeros({0, 2}, options);
-
     // default to true for now, this will be changed to false later
     this->check_consistency = true;
 
@@ -205,8 +202,6 @@ PairMetatensorKokkos<LMPDeviceType>::~PairMetatensorKokkos() {
 // called when finding `pair_style metatensor` in the input
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
-    std::cout << "settings" << std::endl;
-
     if (argc == 0) {
         error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc);
     }
@@ -370,8 +365,6 @@ void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
 
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::allocate() {
-    std::cout << "allocate" << std::endl;
-
     allocated = 1;
 
     // setflags stores whether the coeff for a given pair of atom types are known
@@ -417,7 +410,6 @@ void PairMetatensorKokkos<LMPDeviceType>::allocate() {
 
 template<class LMPDeviceType>
 double PairMetatensorKokkos<LMPDeviceType>::init_one(int, int) {
-    std::cout << "init_one" << std::endl;
     return mts_data->max_cutoff;
 }
 
@@ -425,7 +417,6 @@ double PairMetatensorKokkos<LMPDeviceType>::init_one(int, int) {
 // called on pair_coeff
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::coeff(int argc, char ** argv) {
-    std::cout << "coeff" << std::endl;
     if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) {
         error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * <list of types>`");
     }
@@ -455,7 +446,6 @@ void PairMetatensorKokkos<LMPDeviceType>::coeff(int argc, char ** argv) {
 // called when the run starts
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::init_style() {
-    std::cout << "init_style" << std::endl;
     // Require newton pair on since we need to communicate forces accumulated on
     // ghost atoms to neighboring domains. These forces contributions come from
     // gradient of a local descriptor w.r.t. domain ghosts (periodic images
@@ -529,14 +519,15 @@ void PairMetatensorKokkos<LMPDeviceType>::init_style() {
 
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::init_list(int id, NeighList *ptr) {
-    std::cout << "init_list" << std::endl;
     mts_data->system_adaptor->init_list(id, ptr);
-    std::cout << "init_list done" << std::endl;
 }
 
 
 template<class LMPDeviceType>
 void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
+    // auto start = std::chrono::high_resolution_clock::now();
+    // auto end = std::chrono::high_resolution_clock::now();
+
     // auto x = atomKK->k_x.view<LMPDeviceType>();
     // auto h_array = Kokkos::create_mirror_view(d_array);
     // Kokkos::deep_copy(h_array, d_array);
@@ -573,24 +564,35 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
         error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype());
     }
 
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
+
     // transform from LAMMPS to metatensor System
     auto system = mts_data->system_adaptor->system_from_lmp(
         static_cast<bool>(vflag_global), mts_data->remap_pairs, dtype, mts_data->device
     );
 
+    // torch::cuda::synchronize();
+    // end = std::chrono::high_resolution_clock::now();
+    // std::cout << "sys-from-lmp: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
+
     // only run the calculation for atoms actually in the current domain
-    // TODO: port to Kokkos
-    mts_data->selected_atoms_values.resize_({atom->nlocal, 2});
-    for (int i=0; i<atom->nlocal; i++) {
-        mts_data->selected_atoms_values[i][0] = 0;
-        mts_data->selected_atoms_values[i][1] = i;
-    }
+    auto tensor_options = torch::TensorOptions().dtype(torch::kInt32).device(mts_data->device);
+    torch::Tensor selected_atoms_values = torch::stack({
+        torch::zeros({atom->nlocal}, tensor_options),
+        torch::arange(atom->nlocal, tensor_options)
+    }, -1);
+
     auto selected_atoms = torch::make_intrusive<metatensor_torch::LabelsHolder>(
-        std::vector<std::string>{"system", "atom"}, mts_data->selected_atoms_values
+        std::vector<std::string>{"system", "atom"}, selected_atoms_values
     );
-    mts_data->evaluation_options->set_selected_atoms(selected_atoms->to(mts_data->device));
+    mts_data->evaluation_options->set_selected_atoms(selected_atoms);
 
     torch::IValue result_ivalue;
+
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
+
     try {
         result_ivalue = mts_data->model->forward({
             std::vector<metatensor_torch::System>{system},
@@ -601,6 +603,10 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
         error->all(FLERR, "error evaluating the torch model: {}", e.what());
     }
 
+    // torch::cuda::synchronize();
+    // end = std::chrono::high_resolution_clock::now();
+    // std::cout << "Time taken forward: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
+
     auto result = result_ivalue.toGenericDict();
     auto energy = result.at("energy").toCustomClass<metatensor_torch::TensorMapHolder>();
     auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values();
@@ -631,7 +637,16 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor();
 
     // compute forces/virial with backward propagation
+
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
+
     energy_tensor.backward(-torch::ones_like(energy_tensor));
+
+    // torch::cuda::synchronize();
+    // end = std::chrono::high_resolution_clock::now();
+    // std::cout << "Time taken backward: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
+
     auto forces_tensor = mts_data->system_adaptor->positions.grad();
     assert(forces_tensor.scalar_type() == torch::kFloat64);
 

From ea9a29fb1b75baecde5f2158c5326c92681a276c Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Wed, 23 Oct 2024 09:49:13 +0200
Subject: [PATCH 06/15] Use transposed kokkos NL for a better memory layout

---
 src/KOKKOS/metatensor_system_kokkos.cpp | 25 +++++++++++++------------
 src/KOKKOS/pair_metatensor_kokkos.cpp   |  3 +++
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 0a42a6eb017..e96cbda17c8 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -132,18 +132,20 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
     // auto start = std::chrono::high_resolution_clock::now();
     // auto end = std::chrono::high_resolution_clock::now();
 
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
-
     auto dtype = system->positions().scalar_type();
     auto device = system->positions().device();
 
     auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
-    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
+    // torch::cuda::synchronize();
+    // start = std::chrono::high_resolution_clock::now();
     
     /*-------------- whatever, this will be done on CPU for now ------------------------*/
+    // The cost of this section seems to be very low
+
+    // There is no kokkos cell in LAMMPS, so we need to transfer
+    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
 
     // Collect the local atom id of all local & ghosts atoms, mapping ghosts
     // atoms which are periodic images of local atoms back to the local atoms.
@@ -185,7 +187,6 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
             }
         }
     }
-    /*----------- end of whatever, this will be done on CPU for now --------------*/
 
     auto original_atom_id_tensor = torch::from_blob(
         original_atom_id_.data(),
@@ -196,22 +197,22 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 
     // torch::cuda::synchronize();
     // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "  CPU packaging and GPU transfer (1st part): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+    // std::cout << "  ghost mapping (CPU): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+
+    /*----------- end of whatever, this will be done on CPU for now --------------*/
 
     // torch::cuda::synchronize();
     // start = std::chrono::high_resolution_clock::now();
 
-
     NeighListKokkos<LMPDeviceType>* list_kk = static_cast<NeighListKokkos<LMPDeviceType>*>(this->list_);
 
     auto numneigh_kk = list_kk->d_numneigh;
-    auto neighbors_kk = list_kk->d_neighbors;
+    auto neighbors_kk = list_kk->d_neighbors_transpose;
     auto ilist_kk = list_kk->d_ilist;
 
     auto max_number_of_neighbors = list_kk->maxneighs;
 
-    // mask neighbors_kk with NEIGHMASK. We take this opportunity to set the
-    // layout of this view to LayoutRight, which we need to feed the pointer to torch
+    // mask neighbors_kk with NEIGHMASK
     Kokkos::View<int**, Kokkos::LayoutRight, LMPDeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
     Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) {
         auto local_i = i / max_number_of_neighbors;
@@ -248,7 +249,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 
     // torch::cuda::synchronize();
     // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "  CPU packaging and GPU transfer (2nd part): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+    // std::cout << "  NL format conversion: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
 
     // torch::cuda::synchronize();
     // start = std::chrono::high_resolution_clock::now();
@@ -329,7 +330,7 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 
         // torch::cuda::synchronize();
         // end = std::chrono::high_resolution_clock::now();
-        // std::cout << "  filtering out stuff: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
+        // std::cout << "  NL filtering: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
 
         // torch::cuda::synchronize();
         // start = std::chrono::high_resolution_clock::now();
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index bb055f11cf8..5cabe04f376 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -359,6 +359,9 @@ void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
         allocate();
     }
 
+    // this will allow us to receive the NL in a GPU-friendly format
+    this->lmp->kokkos->neigh_transpose = 1;
+
     std::cout << "Running on " << typeid(ExecutionSpaceFromDevice<LMPDeviceType>::space).name() << std::endl;
 }
 

From f0352aedcbe68139a6efbfe5aec871d56b360cdc Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Thu, 24 Oct 2024 13:45:22 +0200
Subject: [PATCH 07/15] Clean up

---
 doc/src/pair_metatensor.rst             |   2 +
 examples/PACKAGES/metatensor/readme.txt |  20 +-
 src/KOKKOS/metatensor_system_kokkos.cpp | 391 ++++++------------------
 src/KOKKOS/metatensor_system_kokkos.h   |  16 +-
 src/KOKKOS/pair_metatensor_kokkos.cpp   | 133 ++++----
 src/KOKKOS/pair_metatensor_kokkos.h     |  10 +-
 6 files changed, 161 insertions(+), 411 deletions(-)

diff --git a/doc/src/pair_metatensor.rst b/doc/src/pair_metatensor.rst
index aad21c0e84d..fa89291796c 100644
--- a/doc/src/pair_metatensor.rst
+++ b/doc/src/pair_metatensor.rst
@@ -3,6 +3,8 @@
 pair_style metatensor command
 =============================
 
+Accelerator Variants: *metatensor/kk*
+
 Syntax
 """"""
 
diff --git a/examples/PACKAGES/metatensor/readme.txt b/examples/PACKAGES/metatensor/readme.txt
index a09a5131448..c3c3df36ff5 100644
--- a/examples/PACKAGES/metatensor/readme.txt
+++ b/examples/PACKAGES/metatensor/readme.txt
@@ -1,14 +1,14 @@
-Design taken from pace_kokkos with accessory files in their own directory.
-Will probably need some cmake magic to copy them here from somewhere else.
+The base package can be compiled as
+cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/
+where /.../site-packages/torch/ is the path to a pip installation of torch
 
-To be compiled as
-cmake ../cmake/ -DPKG_ML-METATENSOR=ON -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON
+The kokkos version should be compiled as
+cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/
+where /.../libtorch/ is the path to a libtorch C++11 ABI distribution (which can be downloaded from https://pytorch.org/get-started/locally/).
+The OpenMP version (as opposed to the CUDA version) can be enabled with -DKokkos_ENABLE_OPENMP=ON instead of -DKokkos_ENABLE_CUDA=ON
 
-Run the example with
+The consistency between the two interfaces can be checked with
 ../../../build/lmp -k on g 1 -pk kokkos newton on -in in.kokkos.metatensor
-and compare its output with the non-kokkos interface
+(or `t Nt` instead of `g 1` for an OpenMP run with Nt threads) 
+and the output can be compared with that of the plain metatensor interface
 ../../../build/lmp -in in.metatensor
-
-
-cmake ../cmake -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../site-packages/torch/share/cmake/
-cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_OPENMP=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/
diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index e96cbda17c8..bbe7bfe98fd 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -31,6 +31,7 @@
 #include <chrono>
 
 #ifndef KOKKOS_ENABLE_CUDA
+// fake Kokkos::Cuda for non-CUDA builds
 namespace Kokkos {
 class Cuda {};
 } // namespace Kokkos
@@ -40,68 +41,39 @@ using namespace LAMMPS_NS;
 
 /* ---------------------------------------------------------------------- */
 
-template<class LMPDeviceType>
-MetatensorSystemAdaptorKokkos<LMPDeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options):
+template<class DeviceType>
+MetatensorSystemAdaptorKokkos<DeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Pair* requestor, MetatensorSystemOptionsKokkos<DeviceType> options):
     Pointers(lmp),
     list_(nullptr),
     options_(std::move(options)),
     caches_(),
     atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32)))
 {
-    torch::Device device = torch::kCPU;
-    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
-        device = torch::kCUDA;
-    } else {
-        device = torch::kCPU;
-    }
-
     // We ask LAMMPS for a full neighbor lists because we need to know about
     // ALL pairs, even if options->full_list() is false. We will then filter
     // the pairs to only include each pair once where needed.
     auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
     request->set_id(0);
     request->set_cutoff(options_.interaction_range);
-    request->set_kokkos_host(0);
-    request->set_kokkos_device(1);
-
-    this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true));
-}
-
-template<class LMPDeviceType>
-MetatensorSystemAdaptorKokkos<LMPDeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS *lmp, Compute* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options):
-    Pointers(lmp),
-    list_(nullptr),
-    options_(std::move(options)),
-    caches_(),
-    atomic_types_(torch::zeros({0}, torch::TensorOptions().dtype(torch::kInt32)))
-{
-    torch::Device device = torch::kCPU;
-    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
-        device = torch::kCUDA;
-    } else {
-        device = torch::kCPU;
-    }
-
-    auto request = neighbor->add_request(requestor, NeighConst::REQ_FULL | NeighConst::REQ_GHOST);
-    request->set_id(0);
-    request->set_cutoff(options_.interaction_range);
-
-    this->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(device).requires_grad(true));
+    // set whether the kokkos NL should be calculated on host or device
+    request->set_kokkos_host(std::is_same_v<DeviceType,LMPHostType> &&
+                            !std::is_same_v<DeviceType,LMPDeviceType>);
+    request->set_kokkos_device(std::is_same_v<DeviceType,LMPDeviceType>);
 }
 
-template<class LMPDeviceType>
-MetatensorSystemAdaptorKokkos<LMPDeviceType>::~MetatensorSystemAdaptorKokkos() {
+template<class DeviceType>
+MetatensorSystemAdaptorKokkos<DeviceType>::~MetatensorSystemAdaptorKokkos() {
 
 }
 
-template<class LMPDeviceType>
-void MetatensorSystemAdaptorKokkos<LMPDeviceType>::init_list(int id, NeighList* ptr) {
+template<class DeviceType>
+void MetatensorSystemAdaptorKokkos<DeviceType>::init_list(int id, NeighList* ptr) {
     assert(id == 0);
     list_ = ptr;
 }
 
-template<class LMPDeviceType>
-void MetatensorSystemAdaptorKokkos<LMPDeviceType>::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) {
+template<class DeviceType>
+void MetatensorSystemAdaptorKokkos<DeviceType>::add_nl_request(double cutoff, metatensor_torch::NeighborListOptions request) {
     if (cutoff > options_.interaction_range) {
         error->all(FLERR,
             "Invalid metatensor model: one of the requested neighbor lists "
@@ -127,26 +99,21 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::add_nl_request(double cutoff,
 }
 
 
-template<class LMPDeviceType>
-void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metatensor_torch::System& system) {
-    // auto start = std::chrono::high_resolution_clock::now();
-    // auto end = std::chrono::high_resolution_clock::now();
-
+template<class DeviceType>
+void MetatensorSystemAdaptorKokkos<DeviceType>::setup_neighbors_remap(metatensor_torch::System& system) {
     auto dtype = system->positions().scalar_type();
     auto device = system->positions().device();
 
-    auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
+    auto positions_kokkos = this->atomKK->k_x. template view<DeviceType>();
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
-
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
     
-    /*-------------- whatever, this will be done on CPU for now ------------------------*/
-    // The cost of this section seems to be very low
-
+    /*-------------- this will be done on CPU for now ------------------------*/
     // There is no kokkos cell in LAMMPS, so we need to transfer
     auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
 
+    // The hashmap in the following code is not easy to implement in either Kokkos or torch
+    // The cost of this section seems to be very low anyway
+
     // Collect the local atom id of all local & ghosts atoms, mapping ghosts
     // atoms which are periodic images of local atoms back to the local atoms.
     //
@@ -195,49 +162,43 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
     );
     original_atom_id_tensor = original_atom_id_tensor.to(device);  // RIP
 
-    // torch::cuda::synchronize();
-    // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "  ghost mapping (CPU): " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
-
-    /*----------- end of whatever, this will be done on CPU for now --------------*/
+    /*----------- end of "this will be done on CPU for now" --------------*/
 
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
 
-    NeighListKokkos<LMPDeviceType>* list_kk = static_cast<NeighListKokkos<LMPDeviceType>*>(this->list_);
+    NeighListKokkos<DeviceType>* list_kk = static_cast<NeighListKokkos<DeviceType>*>(this->list_);
 
     auto numneigh_kk = list_kk->d_numneigh;
-    auto neighbors_kk = list_kk->d_neighbors_transpose;
+    auto neighbors_kk = list_kk->d_neighbors_transpose;  // transpose to have the same memory format as torch. This was requested in PairMetatensorKokkos::settings
     auto ilist_kk = list_kk->d_ilist;
 
     auto max_number_of_neighbors = list_kk->maxneighs;
 
-    // mask neighbors_kk with NEIGHMASK
-    Kokkos::View<int**, Kokkos::LayoutRight, LMPDeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
+    // mask neighbors_kk with NEIGHMASK. Torch doesn't have this functionality, we do it in Kokkos
+    Kokkos::View<int**, Kokkos::LayoutRight, DeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
     Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) {
         auto local_i = i / max_number_of_neighbors;
         auto local_j = i % max_number_of_neighbors;
         neighbors_kk_masked(local_i, local_j) = neighbors_kk(local_i, local_j) & NEIGHMASK;
     });
 
+    // Convert NL-related data to torch tensors
     auto numneigh_torch = torch::from_blob(
         numneigh_kk.data(),
         {total_n_atoms},
         torch::TensorOptions().dtype(torch::kInt32).device(device)
     );
-
     auto neighbors_torch = torch::from_blob(
         neighbors_kk_masked.data(),
         {total_n_atoms, max_number_of_neighbors},
         torch::TensorOptions().dtype(torch::kInt32).device(device)
     );
-
     auto ilist_torch = torch::from_blob(
         ilist_kk.data(),
         {total_n_atoms},
         torch::TensorOptions().dtype(torch::kInt32).device(device)
     );
 
+    // convert from LAMMPS NL format to metatensor NL format
     auto expanded_arange = torch::arange(max_number_of_neighbors, torch::TensorOptions().dtype(torch::kInt32).device(device)).unsqueeze(0).expand({total_n_atoms, -1});
     auto neighbor_2d_mask = expanded_arange < numneigh_torch.unsqueeze(1);
 
@@ -247,42 +208,47 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 
     auto neighbors_tensor = neighbors_torch.masked_select(neighbor_2d_mask);
 
-    // torch::cuda::synchronize();
-    // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "  NL format conversion: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
-
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
-
     // change centers and neighbors to the original atom ids
     auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor);
     auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor);
 
-    // create torch tensor with the positions (TEMPORARY, TODO: change)
+    // create torch tensor with the positions
     auto positions_tensor = torch::from_blob(
         positions_kokkos.data(),
         {total_n_atoms, 3},
         torch::TensorOptions().dtype(torch::kFloat64).device(device)
     ).to(dtype);
 
+    // The following code is a direct translation of the code in the non-Kokkos version (MetaTensorSystemAdaptor::setup_neighbors_remap),
+    // but rewritten in torch to use the GPU
     for (auto& cache: caches_) {
         // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!)
         auto full_list = cache.options->full_list();
-        if (!full_list) {
+
+        torch::Tensor centers_tensor_original_id_full_or_half;
+        torch::Tensor neighbors_tensor_original_id_full_or_half;
+        torch::Tensor centers_tensor_full_or_half;
+        torch::Tensor neighbors_tensor_full_or_half;
+        if (full_list) {
+            centers_tensor_full_or_half = centers_tensor;
+            neighbors_tensor_full_or_half = neighbors_tensor;
+            centers_tensor_original_id_full_or_half = centers_tensor_original_id;
+            neighbors_tensor_original_id_full_or_half = neighbors_tensor_original_id;
+        } else {
             auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id;
-            centers_tensor = centers_tensor.masked_select(half_list_mask);
-            neighbors_tensor = neighbors_tensor.masked_select(half_list_mask);
-            centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask);
-            neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask);
+            centers_tensor_full_or_half = centers_tensor.masked_select(half_list_mask);
+            neighbors_tensor_full_or_half = neighbors_tensor.masked_select(half_list_mask);
+            centers_tensor_original_id_full_or_half = centers_tensor_original_id.masked_select(half_list_mask);
+            neighbors_tensor_original_id_full_or_half = neighbors_tensor_original_id.masked_select(half_list_mask);
         }
 
         // distance mask
-        auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor);
+        auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor_full_or_half) - positions_tensor.index_select(0, centers_tensor_full_or_half);
         auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff;
 
         // index everything with the mask
-        auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask);
-        auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask);
+        auto centers_tensor_original_id_filtered = centers_tensor_original_id_full_or_half.masked_select(distance_mask);
+        auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_full_or_half.masked_select(distance_mask);
         auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()});
 
         // find filtered interatomic vectors using the original atoms
@@ -293,10 +259,19 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
         auto cell_shifts = pair_shifts.matmul(cell_inv_tensor);
         cell_shifts = torch::round(cell_shifts).to(torch::kInt32);
 
-        if (!full_list) {
+        torch::Tensor centers_tensor_original_id_filtered_full_or_half;
+        torch::Tensor neighbors_tensor_original_id_filtered_full_or_half;
+        torch::Tensor interatomic_vectors_filtered_full_or_half;
+        torch::Tensor cell_shifts_full_or_half;
+        if (full_list) {
+            centers_tensor_original_id_filtered_full_or_half = centers_tensor_original_id_filtered;
+            neighbors_tensor_original_id_filtered_full_or_half = neighbors_tensor_original_id_filtered;
+            interatomic_vectors_filtered_full_or_half = interatomic_vectors_filtered;
+            cell_shifts_full_or_half = cell_shifts;
+        } else {
             auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered;
             auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0;
-            // reproduce this mask with torch:
+            // reproduce this mask (from MetaTensorSystemAdaptor::setup_neighbors_remap) with torch:
             // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0)))
             auto edge_mask = (
                 torch::sum(cell_shifts, 1) == 0 & (
@@ -307,16 +282,14 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
                 )
             );
             auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask));
-            centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask);
-            neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask);
-            interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()});
-            cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()});
+            centers_tensor_original_id_filtered_full_or_half = centers_tensor_original_id_filtered.masked_select(final_mask);
+            neighbors_tensor_original_id_filtered_full_or_half = neighbors_tensor_original_id_filtered.masked_select(final_mask);
+            interatomic_vectors_filtered_full_or_half = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()});
+            cell_shifts_full_or_half = cell_shifts.index({final_mask, torch::indexing::Slice()});
         }
 
-        centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1);
-        neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1);
-        auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1);
-
+        // make sure all the sample are unique
+        auto samples_values = torch::concatenate({centers_tensor_original_id_filtered_full_or_half.unsqueeze(-1), neighbors_tensor_original_id_filtered_full_or_half.unsqueeze(-1), cell_shifts_full_or_half}, 1);
         auto [samples_values_unique, samples_inverse, _] = torch::unique_dim(
             samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false
         );
@@ -328,24 +301,14 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
         auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options());
         sample_indices.scatter_(0, samples_inverse, permutation);
 
-        // torch::cuda::synchronize();
-        // end = std::chrono::high_resolution_clock::now();
-        // std::cout << "  NL filtering: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
-
-        // torch::cuda::synchronize();
-        // start = std::chrono::high_resolution_clock::now();
-
+        // wrap into metatensor data structures
         auto samples = torch::make_intrusive<metatensor_torch::LabelsHolder>(
             std::vector<std::string>{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"},
             samples_values_unique
         );
 
-        // torch::cuda::synchronize();
-        // end = std::chrono::high_resolution_clock::now();
-        // std::cout << "  Time to create big labels: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << "ms" << std::endl;
-
         auto neighbor_list = torch::make_intrusive<metatensor_torch::TensorBlockHolder>(
-            interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1),
+            interatomic_vectors_filtered_full_or_half.index_select(0, sample_indices).unsqueeze(-1),
             samples->to(device),
             std::vector<metatensor_torch::TorchLabels>{
                 metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device),
@@ -360,200 +323,14 @@ void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_remap(metaten
 }
 
 
-template<class LMPDeviceType>
-void MetatensorSystemAdaptorKokkos<LMPDeviceType>::setup_neighbors_no_remap(metatensor_torch::System& system) {
-    auto dtype = system->positions().scalar_type();
-    auto device = system->positions().device();
-
-    auto positions_kokkos = this->atomKK->k_x. template view<LMPDeviceType>();
-    auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
-
-    auto cell_inv_tensor = system->cell().inverse().t().to(device).to(dtype);
-    // it might be a good idea to have this as float32 if the model is using float32
-    // to speed up the computation, especially on GPU
-
-
-    /*-------------- whatever, this will be done on CPU for now ------------------------*/
-
-    // Collect the local atom id of all local & ghosts atoms, mapping ghosts
-    // atoms which are periodic images of local atoms back to the local atoms.
-    //
-    // Metatensor expects pairs corresponding to periodic atoms to be between
-    // the main atoms, but using the actual distance vector between the atom and
-    // the ghost.
-    original_atom_id_.clear();
-    original_atom_id_.reserve(total_n_atoms);
-
-    // identify all local atom by their LAMMPS atom tag.
-    local_atoms_tags_.clear();
-    for (int i=0; i<atom->nlocal; i++) {
-        original_atom_id_.emplace_back(i);
-        local_atoms_tags_.emplace(atom->tag[i], i);
-    }
-
-    // now loop over ghosts & map them back to the main cell if needed
-    ghost_atoms_tags_.clear();
-    for (int i=atom->nlocal; i<total_n_atoms; i++) {
-        auto tag = atom->tag[i];
-        auto it = local_atoms_tags_.find(tag);
-        if (it != local_atoms_tags_.end()) {
-            // this is the periodic image of an atom already owned by this domain
-            original_atom_id_.emplace_back(it->second);
-        } else {
-            // this can either be a periodic image of an atom owned by another
-            // domain, or directly an atom from another domain. Since we can not
-            // really distinguish between these, we take the first atom as the
-            // "main" one and remap all atoms with the same tag to the first one
-            auto it = ghost_atoms_tags_.find(tag);
-            if (it != ghost_atoms_tags_.end()) {
-                // we already found this atom elsewhere in the system
-                original_atom_id_.emplace_back(it->second);
-            } else {
-                // this is the first time we are seeing this atom
-                original_atom_id_.emplace_back(i);
-                ghost_atoms_tags_.emplace(tag, i);
-            }
-        }
-    }
-    /*----------- end of whatever, this will be done on CPU for now --------------*/
-
-    auto original_atom_id_tensor = torch::from_blob(
-        original_atom_id_.data(),
-        {total_n_atoms},
-        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
-    );
-    original_atom_id_tensor = original_atom_id_tensor.to(device);  // RIP
-
-    // Accumulate total number of pairs
-    int total_number_of_pairs = 0;
-    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
-        total_number_of_pairs += list_->numneigh[ii];
-    }
-    std::vector<int> centers(total_number_of_pairs);
-    std::vector<int> neighbors(total_number_of_pairs);
-
-    // Fill the centers and neighbors arrays with the original atom ids
-    int pair_index = 0;
-    for (int ii=0; ii<(list_->inum + list_->gnum); ii++) {
-        auto atom_i = list_->ilist[ii];
-        auto neighbors_ii = list_->firstneigh[ii];
-        for (int jj=0; jj<list_->numneigh[ii]; jj++) {
-            centers[pair_index] = atom_i;
-            neighbors[pair_index] = neighbors_ii[jj] & NEIGHMASK;
-            pair_index++;
-        }
-    }
-
-    // Create torch tensors for the centers and neighbors arrays
-    auto centers_tensor = torch::from_blob(
-        centers.data(),
-        {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
-    );
-    centers_tensor = centers_tensor.to(device);
-    auto neighbors_tensor = torch::from_blob(
-        neighbors.data(),
-        {total_number_of_pairs},
-        torch::TensorOptions().dtype(torch::kInt32).device(torch::kCPU)
-    );
-    neighbors_tensor = neighbors_tensor.to(device);
-
-    // change centers and neighbors to the original atom ids
-    auto centers_tensor_original_id = original_atom_id_tensor.index_select(0, centers_tensor);
-    auto neighbors_tensor_original_id = original_atom_id_tensor.index_select(0, neighbors_tensor);
-
-    // create torch tensor with the positions (TEMPORARY, TODO: change)
-    auto positions_tensor = torch::from_blob(
-        positions_kokkos.data(),
-        {total_n_atoms, 3},
-        torch::TensorOptions().dtype(torch::kFloat64).device(device)
-    ).to(dtype);
-
-    for (auto& cache: caches_) {
-        // half list mask, if necessary (TODO: change names! This could modify the tensors outside the loop if more than one NL!)
-        auto full_list = cache.options->full_list();
-        if (!full_list) {
-            auto half_list_mask = centers_tensor_original_id <= neighbors_tensor_original_id;
-            centers_tensor = centers_tensor.masked_select(half_list_mask);
-            neighbors_tensor = neighbors_tensor.masked_select(half_list_mask);
-            centers_tensor_original_id = centers_tensor_original_id.masked_select(half_list_mask);
-            neighbors_tensor_original_id = neighbors_tensor_original_id.masked_select(half_list_mask);
-        }
-
-        // distance mask
-        auto interatomic_vectors = positions_tensor.index_select(0, neighbors_tensor) - positions_tensor.index_select(0, centers_tensor);
-        auto distance_mask = torch::sum(interatomic_vectors.pow(2), 1) < cache.cutoff*cache.cutoff;
-
-        // index everything with the mask
-        auto centers_tensor_original_id_filtered = centers_tensor_original_id.masked_select(distance_mask);
-        auto neighbors_tensor_original_id_filtered = neighbors_tensor_original_id.masked_select(distance_mask);
-        auto interatomic_vectors_filtered = interatomic_vectors.index({distance_mask, torch::indexing::Slice()});
-
-        // find filtered interatomic vectors using the original atoms
-        auto interatomic_vectors_original_filtered = positions_tensor.index_select(0, neighbors_tensor_original_id_filtered) - positions_tensor.index_select(0, centers_tensor_original_id_filtered);
-
-        // cell shifts
-        auto pair_shifts = interatomic_vectors_filtered - interatomic_vectors_original_filtered;
-        auto cell_shifts = pair_shifts.matmul(cell_inv_tensor);
-        cell_shifts = torch::round(cell_shifts).to(torch::kInt32);
-
-        if (!full_list) {
-            auto half_list_cell_mask = centers_tensor_original_id_filtered == neighbors_tensor_original_id_filtered;
-            auto negative_half_space_mask = torch::sum(cell_shifts, 1) < 0;
-            // reproduce this mask with torch:
-            // if ((shift[0] + shift[1] + shift[2] == 0) && (shift[2] < 0 || (shift[2] == 0 && shift[1] < 0)))
-            auto edge_mask = (
-                torch::sum(cell_shifts, 1) == 0 & (
-                    cell_shifts.index({torch::indexing::Slice(), 2}) < 0 | (
-                        cell_shifts.index({torch::indexing::Slice(), 2}) == 0 &
-                        cell_shifts.index({torch::indexing::Slice(), 1}) < 0
-                    )
-                )
-            );
-            auto final_mask = torch::logical_not(half_list_cell_mask & (negative_half_space_mask | edge_mask));
-            centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.masked_select(final_mask);
-            neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.masked_select(final_mask);
-            interatomic_vectors_filtered = interatomic_vectors_filtered.index({final_mask, torch::indexing::Slice()});
-            cell_shifts = cell_shifts.index({final_mask, torch::indexing::Slice()});
-        }
-
-        centers_tensor_original_id_filtered = centers_tensor_original_id_filtered.unsqueeze(-1);
-        neighbors_tensor_original_id_filtered = neighbors_tensor_original_id_filtered.unsqueeze(-1);
-        auto samples_values = torch::concatenate({centers_tensor_original_id_filtered, neighbors_tensor_original_id_filtered, cell_shifts}, 1);
-
-        auto [samples_values_unique, samples_inverse, _] = torch::unique_dim(
-            samples_values, /*dim=*/0, /*sorted=*/true, /*return_inverse=*/true, /*return_counts=*/false
-        );
-
-        auto permutation = torch::arange(samples_inverse.size(0), samples_inverse.options());
-        samples_inverse = samples_inverse.flip({0});
-        permutation = permutation.flip({0});
-
-        auto sample_indices = torch::empty(samples_values_unique.size(0), samples_inverse.options());
-        sample_indices.scatter_(0, samples_inverse, permutation);
-
-        auto samples = torch::make_intrusive<metatensor_torch::LabelsHolder>(
-            std::vector<std::string>{"first_atom", "second_atom", "cell_shift_a", "cell_shift_b", "cell_shift_c"},
-            samples_values_unique
-        );
-
-        auto neighbor_list = torch::make_intrusive<metatensor_torch::TensorBlockHolder>(
-            interatomic_vectors_filtered.index_select(0, sample_indices).unsqueeze(-1).to(dtype).to(device),
-            samples->to(device),
-            std::vector<metatensor_torch::TorchLabels>{
-                metatensor_torch::LabelsHolder::create({"xyz"}, {{0}, {1}, {2}})->to(device),
-            },
-            metatensor_torch::LabelsHolder::create({"distance"}, {{0}})->to(device)
-        );
-
-        metatensor_torch::register_autograd_neighbors(system, neighbor_list, options_.check_consistency);
-        system->add_neighbor_list(cache.options, neighbor_list);
-    }
+template<class DeviceType>
+void MetatensorSystemAdaptorKokkos<DeviceType>::setup_neighbors_no_remap(metatensor_torch::System& system) {
+    throw std::runtime_error("The metatensor/kk requires remap_pairs to be true");
 }
 
 
-template<class LMPDeviceType>
-metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_from_lmp(
+template<class DeviceType>
+metatensor_torch::System MetatensorSystemAdaptorKokkos<DeviceType>::system_from_lmp(
     bool do_virial,
     bool remap_pairs,
     torch::ScalarType dtype,
@@ -561,9 +338,9 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
 ) {
     auto total_n_atoms = atomKK->nlocal + atomKK->nghost;
 
-    auto atom_types_lammps_kokkos = atomKK->k_type.view<LMPDeviceType>();
+    auto atom_types_lammps_kokkos = atomKK->k_type.view<DeviceType>();
     auto mapping = options_.types_mapping_kokkos;
-    Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms);   /// Can be a class member? (allocation alert)
+    Kokkos::View<int32_t*, Kokkos::LayoutRight, DeviceType> atom_types_metatensor_kokkos("atom_types_metatensor", total_n_atoms);
 
     Kokkos::parallel_for(
         "MetatensorSystemAdaptorKokkos::system_from_lmp::atom_types_mapping",
@@ -577,32 +354,30 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
         atom_types_metatensor_kokkos.data(),
         {total_n_atoms},
         torch::TensorOptions().dtype(torch::kInt32).device(device)
-    ).clone();  /// Again, allocation alert. Not sure if this can be avoided
+    ).clone();  // clone because the original memory belongs to Kokkos and will be deallocated
 
     // atom->x contains "real" and then ghost atoms, in that order
-    auto positions_kokkos = atomKK->k_x.view<LMPDeviceType>();
+    auto positions_kokkos = atomKK->k_x.view<DeviceType>();
     auto tensor_options_positions = torch::TensorOptions().dtype(torch::kFloat64).device(device);
     this->positions = torch::from_blob(
         positions_kokkos.data(), {total_n_atoms, 3},
         // requires_grad=true since we always need gradients w.r.t. positions
         tensor_options_positions
-    ).clone().requires_grad_(true);  /// Allocation alert (clone)
+    ).clone().requires_grad_(true);  // clone (same as above)
 
     auto tensor_options_cell = torch::TensorOptions().dtype(dtype).device(device);
-    auto cell = torch::zeros({3, 3}, tensor_options_cell);  /// Allocation alert, we could make it a class member and allocate it once
-    /// domain doesn't seem to have a Kokkos version
+    auto cell = torch::zeros({3, 3}, tensor_options_cell);  // we could make it a class member and allocate it once
+    
+    // domain doesn't seem to have a Kokkos version. We will need to transfer the cell to the device
     cell[0][0] = domain->xprd;
-
     cell[1][0] = domain->xy;
     cell[1][1] = domain->yprd;
-
     cell[2][0] = domain->xz;
     cell[2][1] = domain->yz;
     cell[2][2] = domain->zprd;
-    /// And the other elements? Are they always zero?
 
     auto system_positions = this->positions.to(dtype);
-    cell = cell.to(dtype).to(device);   /// to(device) alert. How do we find the cell on Kokkos?
+    cell = cell.to(dtype).to(device);
 
     if (do_virial) {
         auto model_strain = this->strain.to(dtype);  /// already on the correct device
@@ -630,4 +405,8 @@ metatensor_torch::System MetatensorSystemAdaptorKokkos<LMPDeviceType>::system_fr
 namespace LAMMPS_NS {
 template class MetatensorNeighborsDataKokkos<LMPDeviceType>;
 template class MetatensorSystemAdaptorKokkos<LMPDeviceType>;
+#ifdef LMP_KOKKOS_GPU
+template class MetatensorNeighborsDataKokkos<LMPHostType>;
+template class MetatensorSystemAdaptorKokkos<LMPHostType>;
+#endif
 }
diff --git a/src/KOKKOS/metatensor_system_kokkos.h b/src/KOKKOS/metatensor_system_kokkos.h
index fbd788235af..3ebea22a1fd 100644
--- a/src/KOKKOS/metatensor_system_kokkos.h
+++ b/src/KOKKOS/metatensor_system_kokkos.h
@@ -28,11 +28,11 @@
 
 namespace LAMMPS_NS {
 
-template<class LMPDeviceType>
+template<class DeviceType>
 struct MetatensorSystemOptionsKokkos {
     // Mapping from LAMMPS types to metatensor types
     const int32_t* types_mapping;
-    const Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> types_mapping_kokkos;
+    const Kokkos::View<int32_t*, Kokkos::LayoutRight, DeviceType> types_mapping_kokkos;
     // interaction range of the model, in LAMMPS units
     double interaction_range;
     // should we run extra checks on the neighbor lists?
@@ -40,7 +40,7 @@ struct MetatensorSystemOptionsKokkos {
 };
 
 // data for metatensor neighbors lists
-template<class LMPDeviceType>
+template<class DeviceType>
 struct MetatensorNeighborsDataKokkos {
     // single neighbors sample containing [i, j, S_a, S_b, S_c]
     using sample_t = std::array<int32_t, 5>;
@@ -79,11 +79,11 @@ struct MetatensorNeighborsDataKokkos {
     std::vector<std::array<float, 3>> distances_f32;
 };
 
-template<class LMPDeviceType>
+template<class DeviceType>
 class MetatensorSystemAdaptorKokkos : public Pointers {
 public:
-    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options);
-    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos<LMPDeviceType> options);
+    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Pair* requestor, MetatensorSystemOptionsKokkos<DeviceType> options);
+    MetatensorSystemAdaptorKokkos(LAMMPS* lmp, Compute* requestor, MetatensorSystemOptionsKokkos<DeviceType> options);
 
     ~MetatensorSystemAdaptorKokkos();
 
@@ -123,13 +123,13 @@ class MetatensorSystemAdaptorKokkos : public Pointers {
 
 private:
     // options for this system adaptor
-    MetatensorSystemOptionsKokkos<LMPDeviceType> options_;
+    MetatensorSystemOptionsKokkos<DeviceType> options_;
 
     // LAMMPS NL
     NeighList* list_;
     // allocations caches for all the NL requested by
     // the model
-    std::vector<MetatensorNeighborsDataKokkos<LMPDeviceType>> caches_;
+    std::vector<MetatensorNeighborsDataKokkos<DeviceType>> caches_;
     // allocation cache for the atomic types in the system
     torch::Tensor atomic_types_;
     // allocation cache holding the "original atom" id for all atoms in the
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index 5cabe04f376..c51d90de88e 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -57,6 +57,7 @@ class Cuda {};
 
 using namespace LAMMPS_NS;
 
+template<class DeviceType>
 struct LAMMPS_NS::PairMetatensorDataKokkos {
     PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit);
 
@@ -80,10 +81,11 @@ struct LAMMPS_NS::PairMetatensorDataKokkos {
     double max_cutoff;
 
     // adaptor from LAMMPS system to metatensor's
-    std::unique_ptr<MetatensorSystemAdaptorKokkos<LMPDeviceType>> system_adaptor;
+    std::unique_ptr<MetatensorSystemAdaptorKokkos<DeviceType>> system_adaptor;
 };
 
-PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit):
+template<class DeviceType>
+PairMetatensorDataKokkos<DeviceType>::PairMetatensorDataKokkos(std::string length_unit, std::string energy_unit):
     system_adaptor(nullptr),
     device(torch::kCPU),
     check_consistency(false),
@@ -106,7 +108,8 @@ PairMetatensorDataKokkos::PairMetatensorDataKokkos(std::string length_unit, std:
     this->evaluation_options->outputs.insert("energy", output);
 }
 
-void PairMetatensorDataKokkos::load_model(
+template<class DeviceType>
+void PairMetatensorDataKokkos<DeviceType>::load_model(
     LAMMPS* lmp,
     const char* path,
     const char* extensions_directory
@@ -131,7 +134,7 @@ void PairMetatensorDataKokkos::load_model(
     }
 
     auto capabilities_ivalue = this->model->run_method("capabilities");
-    this->capabilities = capabilities_ivalue.toCustomClass<metatensor_torch::ModelCapabilitiesHolder>();
+    this->capabilities = capabilities_ivalue. template toCustomClass<metatensor_torch::ModelCapabilitiesHolder>();
 
     if (!this->capabilities->outputs().contains("energy")) {
         lmp->error->all(FLERR, "the model at '{}' does not have an \"energy\" output, we can not use it in pair_style metatensor", path);
@@ -139,7 +142,7 @@ void PairMetatensorDataKokkos::load_model(
 
     if (lmp->comm->me == 0) {
         auto metadata_ivalue = this->model->run_method("metadata");
-        auto metadata = metadata_ivalue.toCustomClass<metatensor_torch::ModelMetadataHolder>();
+        auto metadata = metadata_ivalue. template toCustomClass<metatensor_torch::ModelMetadataHolder>();
         auto to_print = metadata->print();
 
         if (lmp->screen) {
@@ -161,8 +164,8 @@ void PairMetatensorDataKokkos::load_model(
 
 /* ---------------------------------------------------------------------- */
 
-template<class LMPDeviceType>
-PairMetatensorKokkos<LMPDeviceType>::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) {
+template<class DeviceType>
+PairMetatensorKokkos<DeviceType>::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp), type_mapping(nullptr) {
     std::string energy_unit;
     std::string length_unit;
     if (strcmp(update->unit_style, "real") == 0) {
@@ -185,11 +188,11 @@ PairMetatensorKokkos<LMPDeviceType>::PairMetatensorKokkos(LAMMPS *lmp): Pair(lmp
     // so we can not compute virial as fdotr
     this->no_virial_fdotr_compute = 1;
 
-    this->mts_data = new PairMetatensorDataKokkos(std::move(length_unit), std::move(energy_unit));
+    this->mts_data = new PairMetatensorDataKokkos<DeviceType>(std::move(length_unit), std::move(energy_unit));
 }
 
-template<class LMPDeviceType>
-PairMetatensorKokkos<LMPDeviceType>::~PairMetatensorKokkos() {
+template<class DeviceType>
+PairMetatensorKokkos<DeviceType>::~PairMetatensorKokkos() {
     delete this->mts_data;
 
     if (allocated) {
@@ -200,8 +203,8 @@ PairMetatensorKokkos<LMPDeviceType>::~PairMetatensorKokkos() {
 }
 
 // called when finding `pair_style metatensor` in the input
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::settings(int argc, char ** argv) {
     if (argc == 0) {
         error->all(FLERR, "expected at least 1 argument to pair_style metatensor, got {}", argc);
     }
@@ -337,13 +340,13 @@ void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
     mts_data->model->to(mts_data->device);
 
     // Handle potential mismatch between Kokkos and model devices
-    if (std::is_same<LMPDeviceType, Kokkos::Cuda>::value) {
+    if (std::is_same<DeviceType, Kokkos::Cuda>::value) {
         if (!mts_data->device.is_cuda()) {
             throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU");
         }
     } else {
         if (!mts_data->device.is_cpu()) {
-            throw std::runtime_error("Kokkos is running on the host, but the model is not on CPU");
+            throw std::runtime_error("Kokkos is running on CPU, but the model is not on CPU");
         }
     }
 
@@ -362,12 +365,12 @@ void PairMetatensorKokkos<LMPDeviceType>::settings(int argc, char ** argv) {
     // this will allow us to receive the NL in a GPU-friendly format
     this->lmp->kokkos->neigh_transpose = 1;
 
-    std::cout << "Running on " << typeid(ExecutionSpaceFromDevice<LMPDeviceType>::space).name() << std::endl;
+    std::cout << "Running on " << typeid(ExecutionSpaceFromDevice<DeviceType>::space).name() << std::endl;
 }
 
 
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::allocate() {
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::allocate() {
     allocated = 1;
 
     // setflags stores whether the coeff for a given pair of atom types are known
@@ -411,15 +414,15 @@ void PairMetatensorKokkos<LMPDeviceType>::allocate() {
     }
 }
 
-template<class LMPDeviceType>
-double PairMetatensorKokkos<LMPDeviceType>::init_one(int, int) {
+template<class DeviceType>
+double PairMetatensorKokkos<DeviceType>::init_one(int, int) {
     return mts_data->max_cutoff;
 }
 
 
 // called on pair_coeff
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::coeff(int argc, char ** argv) {
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::coeff(int argc, char ** argv) {
     if (argc < 3 || strcmp(argv[0], "*") != 0 || strcmp(argv[1], "*") != 0) {
         error->all(FLERR, "invalid pair_coeff, expected `pair_coeff * * <list of types>`");
     }
@@ -447,8 +450,8 @@ void PairMetatensorKokkos<LMPDeviceType>::coeff(int argc, char ** argv) {
 
 
 // called when the run starts
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::init_style() {
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::init_style() {
     // Require newton pair on since we need to communicate forces accumulated on
     // ghost atoms to neighboring domains. These forces contributions come from
     // gradient of a local descriptor w.r.t. domain ghosts (periodic images
@@ -473,7 +476,7 @@ void PairMetatensorKokkos<LMPDeviceType>::init_style() {
         // determine the maximal cutoff in the NL
         auto requested_nl = mts_data->model->run_method("requested_neighbor_lists");
         for (const auto& ivalue: requested_nl.toList()) {
-            auto options = ivalue.get().toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
+            auto options = ivalue.get(). template toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
             auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit());
 
             mts_data->max_cutoff = std::max(mts_data->max_cutoff, cutoff);
@@ -490,28 +493,30 @@ void PairMetatensorKokkos<LMPDeviceType>::init_style() {
     }
 
     /// create Kokkos view for type_mapping
-    Kokkos::View<int32_t*, Kokkos::LayoutRight, LMPDeviceType> type_mapping_kokkos("type_mapping", atom->ntypes + 1);
+    Kokkos::View<int32_t*, Kokkos::LayoutRight, DeviceType> type_mapping_kokkos("type_mapping", atomKK->ntypes + 1);
     /// copy type_mapping to the Kokkos view (via a host mirror view)
     auto type_mapping_kokkos_host = Kokkos::create_mirror_view(type_mapping_kokkos);
-    for (int i = 0; i < atom->ntypes + 1; i++) {
+    for (int i = 0; i < atomKK->ntypes + 1; i++) {
         type_mapping_kokkos_host(i) = type_mapping[i];
     }
     Kokkos::deep_copy(type_mapping_kokkos, type_mapping_kokkos_host);
 
     // create system adaptor
-    auto options = MetatensorSystemOptionsKokkos<LMPDeviceType>{
+    auto options = MetatensorSystemOptionsKokkos<DeviceType>{
         this->type_mapping,
         type_mapping_kokkos,
         mts_data->max_cutoff,
         mts_data->check_consistency,
     };
-    mts_data->system_adaptor = std::make_unique<MetatensorSystemAdaptorKokkos<LMPDeviceType>>(lmp, this, options);
+    mts_data->system_adaptor = std::make_unique<MetatensorSystemAdaptorKokkos<DeviceType>>(lmp, this, options);
+    // set up the strain on the system adaptor to the correct device to avoid an unnecessary transfer at each step
+    this->mts_data->system_adaptor->strain = torch::eye(3, torch::TensorOptions().dtype(torch::kFloat64).device(mts_data->device).requires_grad(true));
 
     // Translate from the metatensor neighbor lists requests to LAMMPS neighbor
     // lists requests.
     auto requested_nl = mts_data->model->run_method("requested_neighbor_lists");
     for (const auto& ivalue: requested_nl.toList()) {
-        auto options = ivalue.get().toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
+        auto options = ivalue.get(). template toCustomClass<metatensor_torch::NeighborListOptionsHolder>();
         auto cutoff = options->engine_cutoff(mts_data->evaluation_options->length_unit());
         assert(cutoff <= mts_data->max_cutoff);
 
@@ -520,31 +525,17 @@ void PairMetatensorKokkos<LMPDeviceType>::init_style() {
 }
 
 
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::init_list(int id, NeighList *ptr) {
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::init_list(int id, NeighList *ptr) {
     mts_data->system_adaptor->init_list(id, ptr);
 }
 
 
-template<class LMPDeviceType>
-void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
-    // auto start = std::chrono::high_resolution_clock::now();
-    // auto end = std::chrono::high_resolution_clock::now();
-
-    // auto x = atomKK->k_x.view<LMPDeviceType>();
-    // auto h_array = Kokkos::create_mirror_view(d_array);
-    // Kokkos::deep_copy(h_array, d_array);
-    // // Print the values on the host
-    // for (int i = 0; i < 32; ++i) {
-    //     for (int j = 0; j < 3; ++j) {
-    //         std::cout << h_array(i, j) << " ";
-    //     }
-    //     std::cout << std::endl;
-    // }
-
+template<class DeviceType>
+void PairMetatensorKokkos<DeviceType>::compute(int eflag, int vflag) {
     /// Declare what we need to read from the atomKK object and what we will modify
-    this->atomKK->sync(ExecutionSpaceFromDevice<LMPDeviceType>::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
-    this->atomKK->modified(ExecutionSpaceFromDevice<LMPDeviceType>::space, ENERGY_MASK | F_MASK | VIRIAL_MASK);
+    this->atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space, X_MASK | F_MASK | TAG_MASK | TYPE_MASK | ENERGY_MASK | VIRIAL_MASK);
+    this->atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space, ENERGY_MASK | F_MASK | VIRIAL_MASK);
 
     if (eflag || vflag) {
         ev_setup(eflag, vflag);
@@ -567,23 +558,16 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
         error->all(FLERR, "the model requested an unsupported dtype '{}'", mts_data->capabilities->dtype());
     }
 
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
-
     // transform from LAMMPS to metatensor System
     auto system = mts_data->system_adaptor->system_from_lmp(
         static_cast<bool>(vflag_global), mts_data->remap_pairs, dtype, mts_data->device
     );
 
-    // torch::cuda::synchronize();
-    // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "sys-from-lmp: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
-
     // only run the calculation for atoms actually in the current domain
     auto tensor_options = torch::TensorOptions().dtype(torch::kInt32).device(mts_data->device);
     torch::Tensor selected_atoms_values = torch::stack({
-        torch::zeros({atom->nlocal}, tensor_options),
-        torch::arange(atom->nlocal, tensor_options)
+        torch::zeros({atomKK->nlocal}, tensor_options),
+        torch::arange(atomKK->nlocal, tensor_options)
     }, -1);
 
     auto selected_atoms = torch::make_intrusive<metatensor_torch::LabelsHolder>(
@@ -592,10 +576,6 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     mts_data->evaluation_options->set_selected_atoms(selected_atoms);
 
     torch::IValue result_ivalue;
-
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
-
     try {
         result_ivalue = mts_data->model->forward({
             std::vector<metatensor_torch::System>{system},
@@ -606,10 +586,6 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
         error->all(FLERR, "error evaluating the torch model: {}", e.what());
     }
 
-    // torch::cuda::synchronize();
-    // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "Time taken forward: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
-
     auto result = result_ivalue.toGenericDict();
     auto energy = result.at("energy").toCustomClass<metatensor_torch::TensorMapHolder>();
     auto energy_tensor = metatensor_torch::TensorMapHolder::block_by_id(energy, 0)->values();
@@ -619,7 +595,7 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     torch::Tensor global_energy;
     if (eflag_atom) {
         auto energies = energy_detached.accessor<double, 2>();
-        for (int i=0; i<atom->nlocal + atom->nghost; i++) {
+        for (int i=0; i<atomKK->nlocal + atomKK->nghost; i++) {
             // TODO: handle out of order samples
             eatom[i] += energies[i][0];
         }
@@ -640,24 +616,15 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
     mts_data->system_adaptor->strain.mutable_grad() = torch::Tensor();
 
     // compute forces/virial with backward propagation
-
-    // torch::cuda::synchronize();
-    // start = std::chrono::high_resolution_clock::now();
-
     energy_tensor.backward(-torch::ones_like(energy_tensor));
 
-    // torch::cuda::synchronize();
-    // end = std::chrono::high_resolution_clock::now();
-    // std::cout << "Time taken backward: " << std::chrono::duration_cast<std::chrono::microseconds>(end - start).count() / 1000.0 << " ms" << std::endl;
-
     auto forces_tensor = mts_data->system_adaptor->positions.grad();
     assert(forces_tensor.scalar_type() == torch::kFloat64);
 
-    auto forces_lammps_kokkos = this->atomKK->k_f. template view<LMPDeviceType>();
-    /// Is it possible to do double*[3] here?
-    auto forces_metatensor_kokkos = Kokkos::View<double**, Kokkos::LayoutRight, LMPDeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(forces_tensor.contiguous().data_ptr<double>(), atom->nlocal + atom->nghost, 3);
+    auto forces_lammps_kokkos = this->atomKK->k_f. template view<DeviceType>();
+    auto forces_metatensor_kokkos = Kokkos::View<double**, Kokkos::LayoutRight, DeviceType, Kokkos::MemoryTraits<Kokkos::Unmanaged>>(forces_tensor.contiguous(). template data_ptr<double>(), atomKK->nlocal + atomKK->nghost, 3);
 
-    Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atom->nlocal + atom->nghost, KOKKOS_LAMBDA(const int i) {
+    Kokkos::parallel_for("PairMetatensorKokkos::compute::force_accumulation", atomKK->nlocal + atomKK->nghost, KOKKOS_LAMBDA(const int i) {
         forces_lammps_kokkos(i, 0) += forces_metatensor_kokkos(i, 0);
         forces_lammps_kokkos(i, 1) += forces_metatensor_kokkos(i, 1);
         forces_lammps_kokkos(i, 2) += forces_metatensor_kokkos(i, 2);
@@ -670,9 +637,9 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
         assert(virial_tensor.scalar_type() == torch::kFloat64);
 
         // apparently the cell is not supported in Kokkos format,
-        // so it has to be updated on CPU (??)
+        // so it has to be updated on CPU
         auto predicted_virial_tensor_cpu = virial_tensor.cpu();
-        auto predicted_virial = predicted_virial_tensor_cpu.accessor<double, 2>();
+        auto predicted_virial = predicted_virial_tensor_cpu. template accessor<double, 2>();
 
         virial[0] += predicted_virial[0][0];
         virial[1] += predicted_virial[1][1];
@@ -690,5 +657,7 @@ void PairMetatensorKokkos<LMPDeviceType>::compute(int eflag, int vflag) {
 
 namespace LAMMPS_NS {
 template class PairMetatensorKokkos<LMPDeviceType>;
-/// TODO: Host version
+#ifdef LMP_KOKKOS_GPU
+template class PairMetatensorKokkos<LMPHostType>;
+#endif
 }
diff --git a/src/KOKKOS/pair_metatensor_kokkos.h b/src/KOKKOS/pair_metatensor_kokkos.h
index 8f5f144cec5..4dbd199810b 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.h
+++ b/src/KOKKOS/pair_metatensor_kokkos.h
@@ -19,21 +19,21 @@ PairStyle(metatensor/kk, PairMetatensorKokkos<LMPDeviceType>);
 #ifndef LMP_PAIR_METATENSOR_KOKKOS_H
 #define LMP_PAIR_METATENSOR_KOKKOS_H
 
-#include "kokkos_base.h"
 #include "pair_kokkos.h"
 
 namespace LAMMPS_NS {
 
-template<class LMPDeviceType>
+template<class DeviceType>
 class MetatensorSystemAdaptorKokkos;
 
+template<class DeviceType>
 struct PairMetatensorDataKokkos;
 
 /// I noticed that most other kokkos packages inherit from their non-kokkos
 /// counterparts. It doesn't look like a good idea to me because
 /// they end up overriding everything... Not doing it here for now.
-template<class LMPDeviceType>
-class PairMetatensorKokkos : public Pair, public KokkosBase {
+template<class DeviceType>
+class PairMetatensorKokkos : public Pair {
 public:
     PairMetatensorKokkos(class LAMMPS *);
     ~PairMetatensorKokkos();
@@ -47,7 +47,7 @@ class PairMetatensorKokkos : public Pair, public KokkosBase {
 
     void allocate();
 private:
-    PairMetatensorDataKokkos* mts_data;
+    PairMetatensorDataKokkos<DeviceType>* mts_data;
 
     // mapping from LAMMPS types to metatensor types
     int32_t* type_mapping;

From a2a358f4d915159869e8f678fe4241bb4265c4ea Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Thu, 24 Oct 2024 13:56:23 +0200
Subject: [PATCH 08/15] Partial clean-up of the examples folder

---
 .../metatensor/log.26Jun2024.metatensor.g++.1 |  135 ++
 .../metatensor/log.26Jun2024.metatensor.g++.4 |  135 ++
 examples/PACKAGES/metatensor/logg             | 1415 -----------------
 3 files changed, 270 insertions(+), 1415 deletions(-)
 create mode 100644 examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1
 create mode 100644 examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4
 delete mode 100644 examples/PACKAGES/metatensor/logg

diff --git a/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1 b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1
new file mode 100644
index 00000000000..e400bfa0886
--- /dev/null
+++ b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.1
@@ -0,0 +1,135 @@
+LAMMPS (17 Apr 2024 - Development - patch_17Apr2024-557-gef1630afd2)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+boundary p p p
+
+atom_style atomic
+lattice fcc 3.6
+Lattice spacing in x,y,z = 3.6 3.6 3.6
+region box block 0 2 0 2 0 2
+create_box 1 box
+Created orthogonal box = (0 0 0) to (7.2 7.2 7.2)
+  1 by 1 by 1 MPI processor grid
+create_atoms 1 box
+Created 32 atoms
+  using lattice units in orthogonal box = (0 0 0) to (7.2 7.2 7.2)
+  create_atoms CPU = 0.000 seconds
+
+labelmap atom 1 Ni
+mass Ni 58.693
+
+velocity all create 123 42
+
+pair_style metatensor nickel-lj.pt
+
+This is the Test Lennard-Jones model
+====================================
+
+Minimal shifted Lennard-Jones potential, to be used when testing the
+integration of metatensor atomistic models with various simulation engines.
+
+Model authors
+-------------
+
+- Guillaume Fraux <guillaume.fraux@epfl.ch>
+
+Model references
+----------------
+
+Please cite the following references when using this model:
+- about this specific model:
+  * https://github.com/luthaf/metatensor-lj-test
+- about the implementation of this model:
+  * https://github.com/lab-cosmo/metatensor
+
+Running simulation on cpu device with float64 data
+# pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/
+pair_coeff * * 28
+
+timestep 0.001
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0
+fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 $(1000 * dt) drag 1.0
+fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 1 drag 1.0
+
+thermo 10
+thermo_style custom step temp pe etotal press vol
+
+# dump 1 all atom 10 dump.metatensor
+
+run 100
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
+
+@Article{Gissinger24,
+ author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
+ title = {Type Label Framework for Bonded Force Fields in LAMMPS},
+ journal = {J. Phys. Chem. B},
+ year =    2024,
+ volume =  128,
+ number =  13,
+ pages =   {3282–-3297}
+}
+
+- https://github.com/lab-cosmo/metatensor
+- https://github.com/luthaf/metatensor-lj-test
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 8.5
+  ghost atom cutoff = 8.5
+  binsize = 4.25, bins = 2 2 2
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair metatensor, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.377 | 3.377 | 3.377 Mbytes
+   Step          Temp          PotEng         TotEng         Press          Volume    
+         0   123           -8.2814195     -7.7885506     -67585.536      373.248      
+        10   124.8498      -8.395127      -7.8948458     -68884.117      370.7507     
+        20   130.60229     -8.7447028     -8.221371      -72913.372      363.3685     
+        30   140.95014     -9.3595663     -8.79477       -80162.936      351.36119    
+        40   157.33663     -10.29346      -9.6630017     -91879.643      335.18851    
+        50   181.74279     -11.619487     -10.891232     -108735.54      315.60425    
+        60   216.75162     -13.405317     -12.536779     -131438.83      293.736      
+        70   264.39963     -15.685874     -14.626408     -160402.97      270.99304    
+        80   319.4713      -18.40371      -17.123568     -192237.03      248.74525    
+        90   350.37789     -21.272294     -19.868307     -215596.99      227.98439    
+       100   298.01005     -23.674365     -22.48022      -206922.9       209.26415    
+Loop time of 4.01198 on 1 procs for 100 steps with 32 atoms
+
+Performance: 2.154 ns/day, 11.144 hours/ns, 24.925 timesteps/s, 797.611 atom-step/s
+99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 3.9962     | 3.9962     | 3.9962     |   0.0 | 99.61
+Neigh   | 0.01366    | 0.01366    | 0.01366    |   0.0 |  0.34
+Comm    | 0.00055756 | 0.00055756 | 0.00055756 |   0.0 |  0.01
+Output  | 0.00016915 | 0.00016915 | 0.00016915 |   0.0 |  0.00
+Modify  | 0.001224   | 0.001224   | 0.001224   |   0.0 |  0.03
+Other   |            | 0.00021    |            |       |  0.01
+
+Nlocal:             32 ave          32 max          32 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:           1655 ave        1655 max        1655 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:              0 ave           0 max           0 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+FullNghs:        11490 ave       11490 max       11490 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 11490
+Ave neighs/atom = 359.0625
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:04
diff --git a/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4 b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4
new file mode 100644
index 00000000000..d6dbff0d78e
--- /dev/null
+++ b/examples/PACKAGES/metatensor/log.26Jun2024.metatensor.g++.4
@@ -0,0 +1,135 @@
+LAMMPS (17 Apr 2024 - Development - patch_17Apr2024-557-gef1630afd2)
+  using 1 OpenMP thread(s) per MPI task
+units metal
+boundary p p p
+
+atom_style atomic
+lattice fcc 3.6
+Lattice spacing in x,y,z = 3.6 3.6 3.6
+region box block 0 2 0 2 0 2
+create_box 1 box
+Created orthogonal box = (0 0 0) to (7.2 7.2 7.2)
+  1 by 2 by 2 MPI processor grid
+create_atoms 1 box
+Created 32 atoms
+  using lattice units in orthogonal box = (0 0 0) to (7.2 7.2 7.2)
+  create_atoms CPU = 0.001 seconds
+
+labelmap atom 1 Ni
+mass Ni 58.693
+
+velocity all create 123 42
+
+pair_style metatensor nickel-lj.pt
+
+This is the Test Lennard-Jones model
+====================================
+
+Minimal shifted Lennard-Jones potential, to be used when testing the
+integration of metatensor atomistic models with various simulation engines.
+
+Model authors
+-------------
+
+- Guillaume Fraux <guillaume.fraux@epfl.ch>
+
+Model references
+----------------
+
+Please cite the following references when using this model:
+- about this specific model:
+  * https://github.com/luthaf/metatensor-lj-test
+- about the implementation of this model:
+  * https://github.com/lab-cosmo/metatensor
+
+Running simulation on cpu device with float64 data
+# pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/
+pair_coeff * * 28
+
+timestep 0.001
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0
+fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 $(1000 * dt) drag 1.0
+fix 1 all npt temp 123 123 0.10000000000000000555 iso 0 0 1 drag 1.0
+
+thermo 10
+thermo_style custom step temp pe etotal press vol
+
+# dump 1 all atom 10 dump.metatensor
+
+run 100
+
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Your simulation uses code contributions which should be cited:
+
+- Type Label Framework: https://doi.org/10.1021/acs.jpcb.3c08419
+
+@Article{Gissinger24,
+ author = {Jacob R. Gissinger, Ilia Nikiforov, Yaser Afshar, Brendon Waters, Moon-ki Choi, Daniel S. Karls, Alexander Stukowski, Wonpil Im, Hendrik Heinz, Axel Kohlmeyer, and Ellad B. Tadmor},
+ title = {Type Label Framework for Bonded Force Fields in LAMMPS},
+ journal = {J. Phys. Chem. B},
+ year =    2024,
+ volume =  128,
+ number =  13,
+ pages =   {3282–-3297}
+}
+
+- https://github.com/lab-cosmo/metatensor
+- https://github.com/luthaf/metatensor-lj-test
+CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE-CITE
+
+Generated 0 of 0 mixed pair_coeff terms from geometric mixing rule
+Neighbor list info ...
+  update: every = 1 steps, delay = 0 steps, check = yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 8.5
+  ghost atom cutoff = 8.5
+  binsize = 4.25, bins = 2 2 2
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair metatensor, perpetual
+      attributes: full, newton on, ghost
+      pair build: full/bin/ghost
+      stencil: full/ghost/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 3.359 | 3.359 | 3.359 Mbytes
+   Step          Temp          PotEng         TotEng         Press          Volume    
+         0   123           -8.2814195     -7.7885506     -67585.536      373.248      
+        10   124.79957     -8.3949245     -7.8948446     -68883.161      370.7507     
+        20   130.37558     -8.7437731     -8.2213497     -72909.223      363.36859    
+        30   140.35202     -9.3570472     -8.7946476     -80147.272      351.36165    
+        40   156.04321     -10.287834     -9.6625589     -91867.312      335.19011    
+        50   179.24129     -11.608375     -10.890143     -108707.41      315.60758    
+        60   212.26895     -13.385134     -12.534558     -131437.18      293.74143    
+        70   257.12553     -15.653741     -14.623422     -160605.1       270.99882    
+        80   309.97318     -18.367667     -17.125584     -193391.56      248.74226    
+        90   345.50571     -21.286382     -19.901918     -220089.9       227.94171    
+       100   318.19414     -23.90463      -22.629605     -220782.88      209.09539    
+Loop time of 4.16296 on 4 procs for 100 steps with 32 atoms
+
+Performance: 2.075 ns/day, 11.564 hours/ns, 24.021 timesteps/s, 768.684 atom-step/s
+97.4% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 3.8847     | 3.9666     | 4.0708     |   3.8 | 95.28
+Neigh   | 0.008984   | 0.0098269  | 0.012274   |   1.4 |  0.24
+Comm    | 0.060256   | 0.16685    | 0.24951    |  18.7 |  4.01
+Output  | 0.00014112 | 0.0021948  | 0.0083534  |   7.6 |  0.05
+Modify  | 0.010689   | 0.016268   | 0.018145   |   2.5 |  0.39
+Other   |            | 0.001203   |            |       |  0.03
+
+Nlocal:              8 ave          10 max           6 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Nghost:           1259 ave        1261 max        1257 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+Neighs:              0 ave           0 max           0 min
+Histogram: 4 0 0 0 0 0 0 0 0 0
+FullNghs:         2891 ave        3614 max        2167 min
+Histogram: 2 0 0 0 0 0 0 0 0 2
+
+Total # of neighbors = 11564
+Ave neighs/atom = 361.375
+Neighbor list builds = 2
+Dangerous builds = 0
+Total wall time: 0:00:04
diff --git a/examples/PACKAGES/metatensor/logg b/examples/PACKAGES/metatensor/logg
deleted file mode 100644
index d66c31e6a13..00000000000
--- a/examples/PACKAGES/metatensor/logg
+++ /dev/null
@@ -1,1415 +0,0 @@
-
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 11.588ms
-         creating samples Labels (287466 pairs) ... took 26.1818ms
-         moving neighbor data to dtype/device ... took 37.6156ms
-         creating neighbors TensorBlock ... took 7.37964ms
-      converting neighbors without ghosts remapping took 82.9443ms
-   creating System from LAMMPS data took 129.204ms
-   running Model::forward ... took 346.452ms
-   running Model::backward ... took 45.4164ms
-   storing model output in LAMMPS data structures ... took 0.061583ms
-PairMetatensor::compute took 534.32ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.74904ms
-         creating samples Labels (287466 pairs) ... took 36.0691ms
-         moving neighbor data to dtype/device ... took 45.8921ms
-         creating neighbors TensorBlock ... took 0.119523ms
-      converting neighbors without ghosts remapping took 85.9571ms
-   creating System from LAMMPS data took 89.8463ms
-   running Model::forward ... took 30.4214ms
-   running Model::backward ... took 6.7433ms
-   storing model output in LAMMPS data structures ... took 0.058992ms
-PairMetatensor::compute took 147.152ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.91786ms
-         creating samples Labels (287466 pairs) ... took 24.7922ms
-         moving neighbor data to dtype/device ... took 46.1817ms
-         creating neighbors TensorBlock ... took 0.154812ms
-      converting neighbors without ghosts remapping took 75.1849ms
-   creating System from LAMMPS data took 79.0453ms
-   running Model::forward ... took 83.0292ms
-   running Model::backward ... took 89.7461ms
-   storing model output in LAMMPS data structures ... took 0.06542ms
-PairMetatensor::compute took 273.659ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.82377ms
-         creating samples Labels (287466 pairs) ... took 22.4251ms
-         moving neighbor data to dtype/device ... took 45.8675ms
-         creating neighbors TensorBlock ... took 0.126732ms
-      converting neighbors without ghosts remapping took 72.3638ms
-   creating System from LAMMPS data took 76.2302ms
-   running Model::forward ... took 0.791876ms
-   running Model::backward ... took 4.42353ms
-   storing model output in LAMMPS data structures ... took 0.059108ms
-PairMetatensor::compute took 101.871ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.90645ms
-         creating samples Labels (287466 pairs) ... took 20.9111ms
-         moving neighbor data to dtype/device ... took 44.6434ms
-         creating neighbors TensorBlock ... took 0.111157ms
-      converting neighbors without ghosts remapping took 69.6583ms
-   creating System from LAMMPS data took 73.4327ms
-   running Model::forward ... took 0.794495ms
-   running Model::backward ... took 4.41927ms
-   storing model output in LAMMPS data structures ... took 0.059082ms
-PairMetatensor::compute took 100.097ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.82388ms
-         creating samples Labels (287466 pairs) ... took 21.707ms
-         moving neighbor data to dtype/device ... took 45.2102ms
-         creating neighbors TensorBlock ... took 0.113546ms
-      converting neighbors without ghosts remapping took 70.9416ms
-   creating System from LAMMPS data took 74.8743ms
-   running Model::forward ... took 0.796907ms
-   running Model::backward ... took 5.37424ms
-   storing model output in LAMMPS data structures ... took 0.07182ms
-PairMetatensor::compute took 101.204ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.87195ms
-         creating samples Labels (287466 pairs) ... took 21.1455ms
-         moving neighbor data to dtype/device ... took 45.696ms
-         creating neighbors TensorBlock ... took 0.110128ms
-      converting neighbors without ghosts remapping took 70.9161ms
-   creating System from LAMMPS data took 74.8993ms
-   running Model::forward ... took 0.786076ms
-   running Model::backward ... took 5.39018ms
-   storing model output in LAMMPS data structures ... took 0.058135ms
-PairMetatensor::compute took 102.041ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.18867ms
-         creating samples Labels (287466 pairs) ... took 22.816ms
-         moving neighbor data to dtype/device ... took 49.7625ms
-         creating neighbors TensorBlock ... took 0.106828ms
-      converting neighbors without ghosts remapping took 76.9631ms
-   creating System from LAMMPS data took 80.7761ms
-   running Model::forward ... took 0.799495ms
-   running Model::backward ... took 4.40531ms
-   storing model output in LAMMPS data structures ... took 0.0666ms
-PairMetatensor::compute took 107.403ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.17782ms
-         creating samples Labels (287466 pairs) ... took 22.7238ms
-         moving neighbor data to dtype/device ... took 46.4673ms
-         creating neighbors TensorBlock ... took 0.124666ms
-      converting neighbors without ghosts remapping took 73.5818ms
-   creating System from LAMMPS data took 77.4117ms
-   running Model::forward ... took 0.786817ms
-   running Model::backward ... took 4.40362ms
-   storing model output in LAMMPS data structures ... took 0.059165ms
-PairMetatensor::compute took 103.758ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.30216ms
-         creating samples Labels (287466 pairs) ... took 24.2498ms
-         moving neighbor data to dtype/device ... took 48.4229ms
-         creating neighbors TensorBlock ... took 0.158321ms
-      converting neighbors without ghosts remapping took 77.2592ms
-   creating System from LAMMPS data took 81.0686ms
-   running Model::forward ... took 0.795704ms
-   running Model::backward ... took 5.35188ms
-   storing model output in LAMMPS data structures ... took 0.065461ms
-PairMetatensor::compute took 107.435ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.9266ms
-         creating samples Labels (287466 pairs) ... took 21.134ms
-         moving neighbor data to dtype/device ... took 45.3357ms
-         creating neighbors TensorBlock ... took 0.11882ms
-      converting neighbors without ghosts remapping took 70.6304ms
-   creating System from LAMMPS data took 74.4252ms
-   running Model::forward ... took 0.796698ms
-   running Model::backward ... took 4.43267ms
-   storing model output in LAMMPS data structures ... took 0.05924ms
-PairMetatensor::compute took 99.8232ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.77185ms
-         creating samples Labels (287466 pairs) ... took 21.1276ms
-         moving neighbor data to dtype/device ... took 44.7475ms
-         creating neighbors TensorBlock ... took 0.120111ms
-      converting neighbors without ghosts remapping took 69.8644ms
-   creating System from LAMMPS data took 73.6641ms
-   running Model::forward ... took 0.800139ms
-   running Model::backward ... took 5.3336ms
-   storing model output in LAMMPS data structures ... took 0.058551ms
-PairMetatensor::compute took 99.9573ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.11082ms
-         creating samples Labels (287466 pairs) ... took 21.8182ms
-         moving neighbor data to dtype/device ... took 46.0812ms
-         creating neighbors TensorBlock ... took 0.123661ms
-      converting neighbors without ghosts remapping took 72.2505ms
-   creating System from LAMMPS data took 76.197ms
-   running Model::forward ... took 0.803314ms
-   running Model::backward ... took 4.38621ms
-   storing model output in LAMMPS data structures ... took 0.059434ms
-PairMetatensor::compute took 102.593ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.16883ms
-         creating samples Labels (287466 pairs) ... took 23.0877ms
-         moving neighbor data to dtype/device ... took 45.8524ms
-         creating neighbors TensorBlock ... took 0.11302ms
-      converting neighbors without ghosts remapping took 73.3361ms
-   creating System from LAMMPS data took 77.0976ms
-   running Model::forward ... took 0.789521ms
-   running Model::backward ... took 5.35487ms
-   storing model output in LAMMPS data structures ... took 0.061607ms
-PairMetatensor::compute took 104.648ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.1771ms
-         creating samples Labels (287466 pairs) ... took 23.5011ms
-         moving neighbor data to dtype/device ... took 47.459ms
-         creating neighbors TensorBlock ... took 0.111697ms
-      converting neighbors without ghosts remapping took 75.3441ms
-   creating System from LAMMPS data took 79.1161ms
-   running Model::forward ... took 0.812649ms
-   running Model::backward ... took 5.39704ms
-   storing model output in LAMMPS data structures ... took 0.058938ms
-PairMetatensor::compute took 107.458ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.27067ms
-         creating samples Labels (287466 pairs) ... took 23.2998ms
-         moving neighbor data to dtype/device ... took 46.1408ms
-         creating neighbors TensorBlock ... took 0.158251ms
-      converting neighbors without ghosts remapping took 73.9736ms
-   creating System from LAMMPS data took 77.8013ms
-   running Model::forward ... took 0.791141ms
-   running Model::backward ... took 4.39856ms
-   storing model output in LAMMPS data structures ... took 0.057344ms
-PairMetatensor::compute took 104.539ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.22692ms
-         creating samples Labels (287466 pairs) ... took 22.6665ms
-         moving neighbor data to dtype/device ... took 45.9093ms
-         creating neighbors TensorBlock ... took 0.124772ms
-      converting neighbors without ghosts remapping took 73.0163ms
-   creating System from LAMMPS data took 76.8416ms
-   running Model::forward ... took 0.789668ms
-   running Model::backward ... took 4.42614ms
-   storing model output in LAMMPS data structures ... took 0.058491ms
-PairMetatensor::compute took 103.312ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.26549ms
-         creating samples Labels (287466 pairs) ... took 22.8947ms
-         moving neighbor data to dtype/device ... took 47.2927ms
-         creating neighbors TensorBlock ... took 0.154429ms
-      converting neighbors without ghosts remapping took 74.7127ms
-   creating System from LAMMPS data took 78.5063ms
-   running Model::forward ... took 0.798138ms
-   running Model::backward ... took 4.42496ms
-   storing model output in LAMMPS data structures ... took 0.058954ms
-PairMetatensor::compute took 105.307ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.38523ms
-         creating samples Labels (287466 pairs) ... took 23.1254ms
-         moving neighbor data to dtype/device ... took 45.8289ms
-         creating neighbors TensorBlock ... took 0.112197ms
-      converting neighbors without ghosts remapping took 73.5391ms
-   creating System from LAMMPS data took 77.3355ms
-   running Model::forward ... took 0.793411ms
-   running Model::backward ... took 4.46711ms
-   storing model output in LAMMPS data structures ... took 0.069233ms
-PairMetatensor::compute took 104.143ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.2998ms
-         creating samples Labels (287466 pairs) ... took 22.5969ms
-         moving neighbor data to dtype/device ... took 47.2391ms
-         creating neighbors TensorBlock ... took 0.106886ms
-      converting neighbors without ghosts remapping took 74.3358ms
-   creating System from LAMMPS data took 78.1345ms
-   running Model::forward ... took 0.819647ms
-   running Model::backward ... took 5.43234ms
-   storing model output in LAMMPS data structures ... took 0.058084ms
-PairMetatensor::compute took 105.96ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.16364ms
-         creating samples Labels (287466 pairs) ... took 27.3423ms
-         moving neighbor data to dtype/device ... took 47.4395ms
-         creating neighbors TensorBlock ... took 0.105556ms
-      converting neighbors without ghosts remapping took 79.1362ms
-   creating System from LAMMPS data took 82.9187ms
-   running Model::forward ... took 0.795955ms
-   running Model::backward ... took 4.45544ms
-   storing model output in LAMMPS data structures ... took 0.057359ms
-PairMetatensor::compute took 109.117ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.20311ms
-         creating samples Labels (287466 pairs) ... took 22.9165ms
-         moving neighbor data to dtype/device ... took 47.7985ms
-         creating neighbors TensorBlock ... took 0.109164ms
-      converting neighbors without ghosts remapping took 75.1157ms
-   creating System from LAMMPS data took 78.8708ms
-   running Model::forward ... took 0.837701ms
-   running Model::backward ... took 4.43931ms
-   storing model output in LAMMPS data structures ... took 0.060665ms
-PairMetatensor::compute took 105.683ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.28898ms
-         creating samples Labels (287466 pairs) ... took 22.5997ms
-         moving neighbor data to dtype/device ... took 47.1497ms
-         creating neighbors TensorBlock ... took 0.107474ms
-      converting neighbors without ghosts remapping took 74.2347ms
-   creating System from LAMMPS data took 78.023ms
-   running Model::forward ... took 0.826464ms
-   running Model::backward ... took 5.42335ms
-   storing model output in LAMMPS data structures ... took 0.057351ms
-PairMetatensor::compute took 105.857ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.28581ms
-         creating samples Labels (287466 pairs) ... took 23.2556ms
-         moving neighbor data to dtype/device ... took 46.0997ms
-         creating neighbors TensorBlock ... took 0.115791ms
-      converting neighbors without ghosts remapping took 73.8493ms
-   creating System from LAMMPS data took 77.6586ms
-   running Model::forward ... took 0.812054ms
-   running Model::backward ... took 4.43172ms
-   storing model output in LAMMPS data structures ... took 0.05824ms
-PairMetatensor::compute took 104.459ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.12055ms
-         creating samples Labels (287466 pairs) ... took 21.2199ms
-         moving neighbor data to dtype/device ... took 44.8198ms
-         creating neighbors TensorBlock ... took 0.110947ms
-      converting neighbors without ghosts remapping took 70.3644ms
-   creating System from LAMMPS data took 74.3517ms
-   running Model::forward ... took 0.798389ms
-   running Model::backward ... took 5.44111ms
-   storing model output in LAMMPS data structures ... took 0.06137ms
-PairMetatensor::compute took 102.684ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.16484ms
-         creating samples Labels (287466 pairs) ... took 23.647ms
-         moving neighbor data to dtype/device ... took 47.7067ms
-         creating neighbors TensorBlock ... took 0.110107ms
-      converting neighbors without ghosts remapping took 75.718ms
-   creating System from LAMMPS data took 79.4892ms
-   running Model::forward ... took 0.793688ms
-   running Model::backward ... took 4.43442ms
-   storing model output in LAMMPS data structures ... took 0.060361ms
-PairMetatensor::compute took 106.69ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.18171ms
-         creating samples Labels (287466 pairs) ... took 26.8085ms
-         moving neighbor data to dtype/device ... took 47.423ms
-         creating neighbors TensorBlock ... took 0.106612ms
-      converting neighbors without ghosts remapping took 78.6078ms
-   creating System from LAMMPS data took 82.3605ms
-   running Model::forward ... took 0.794912ms
-   running Model::backward ... took 5.44181ms
-   storing model output in LAMMPS data structures ... took 0.060014ms
-PairMetatensor::compute took 109.62ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.22628ms
-         creating samples Labels (287466 pairs) ... took 22.849ms
-         moving neighbor data to dtype/device ... took 47.0228ms
-         creating neighbors TensorBlock ... took 0.124639ms
-      converting neighbors without ghosts remapping took 74.3099ms
-   creating System from LAMMPS data took 78.0758ms
-   running Model::forward ... took 0.80199ms
-   running Model::backward ... took 5.4321ms
-   storing model output in LAMMPS data structures ... took 0.059076ms
-PairMetatensor::compute took 106.135ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.18387ms
-         creating samples Labels (287466 pairs) ... took 22.66ms
-         moving neighbor data to dtype/device ... took 46.8876ms
-         creating neighbors TensorBlock ... took 0.11875ms
-      converting neighbors without ghosts remapping took 73.9685ms
-   creating System from LAMMPS data took 77.7805ms
-   running Model::forward ... took 0.797688ms
-   running Model::backward ... took 4.43546ms
-   storing model output in LAMMPS data structures ... took 0.059668ms
-PairMetatensor::compute took 103.278ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.87171ms
-         creating samples Labels (287466 pairs) ... took 21.2273ms
-         moving neighbor data to dtype/device ... took 45.0825ms
-         creating neighbors TensorBlock ... took 0.115807ms
-      converting neighbors without ghosts remapping took 70.3943ms
-   creating System from LAMMPS data took 74.1551ms
-   running Model::forward ... took 0.808445ms
-   running Model::backward ... took 4.37139ms
-   storing model output in LAMMPS data structures ... took 0.064335ms
-PairMetatensor::compute took 100.791ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.17737ms
-         creating samples Labels (287466 pairs) ... took 22.8726ms
-         moving neighbor data to dtype/device ... took 44.8031ms
-         creating neighbors TensorBlock ... took 0.107526ms
-      converting neighbors without ghosts remapping took 72.0471ms
-   creating System from LAMMPS data took 75.7925ms
-   running Model::forward ... took 0.798661ms
-   running Model::backward ... took 4.46603ms
-   storing model output in LAMMPS data structures ... took 0.059673ms
-PairMetatensor::compute took 102.565ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.15153ms
-         creating samples Labels (287466 pairs) ... took 23.5642ms
-         moving neighbor data to dtype/device ... took 46.0935ms
-         creating neighbors TensorBlock ... took 0.112029ms
-      converting neighbors without ghosts remapping took 74.0067ms
-   creating System from LAMMPS data took 77.8098ms
-   running Model::forward ... took 0.792758ms
-   running Model::backward ... took 4.4473ms
-   storing model output in LAMMPS data structures ... took 0.057952ms
-PairMetatensor::compute took 104.104ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.32375ms
-         creating samples Labels (287466 pairs) ... took 23.0384ms
-         moving neighbor data to dtype/device ... took 47.157ms
-         creating neighbors TensorBlock ... took 0.107811ms
-      converting neighbors without ghosts remapping took 74.7455ms
-   creating System from LAMMPS data took 78.5296ms
-   running Model::forward ... took 0.827014ms
-   running Model::backward ... took 5.48426ms
-   storing model output in LAMMPS data structures ... took 0.060777ms
-PairMetatensor::compute took 106.374ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.19645ms
-         creating samples Labels (287466 pairs) ... took 22.9036ms
-         moving neighbor data to dtype/device ... took 47.1023ms
-         creating neighbors TensorBlock ... took 0.116411ms
-      converting neighbors without ghosts remapping took 74.4102ms
-   creating System from LAMMPS data took 78.1717ms
-   running Model::forward ... took 0.795452ms
-   running Model::backward ... took 5.42579ms
-   storing model output in LAMMPS data structures ... took 0.058078ms
-PairMetatensor::compute took 105.425ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.23505ms
-         creating samples Labels (287466 pairs) ... took 22.9348ms
-         moving neighbor data to dtype/device ... took 47.0822ms
-         creating neighbors TensorBlock ... took 0.111727ms
-      converting neighbors without ghosts remapping took 74.4538ms
-   creating System from LAMMPS data took 78.1915ms
-   running Model::forward ... took 0.791693ms
-   running Model::backward ... took 4.43297ms
-   storing model output in LAMMPS data structures ... took 0.057177ms
-PairMetatensor::compute took 104.943ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.25868ms
-         creating samples Labels (287466 pairs) ... took 23.5388ms
-         moving neighbor data to dtype/device ... took 47.6249ms
-         creating neighbors TensorBlock ... took 0.107923ms
-      converting neighbors without ghosts remapping took 75.6246ms
-   creating System from LAMMPS data took 79.3737ms
-   running Model::forward ... took 0.799768ms
-   running Model::backward ... took 4.45003ms
-   storing model output in LAMMPS data structures ... took 0.059326ms
-PairMetatensor::compute took 105.848ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.05246ms
-         creating samples Labels (287466 pairs) ... took 22.8078ms
-         moving neighbor data to dtype/device ... took 45.0724ms
-         creating neighbors TensorBlock ... took 0.111755ms
-      converting neighbors without ghosts remapping took 72.1313ms
-   creating System from LAMMPS data took 75.8856ms
-   running Model::forward ... took 0.796093ms
-   running Model::backward ... took 4.43689ms
-   storing model output in LAMMPS data structures ... took 0.058598ms
-PairMetatensor::compute took 102.295ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.13317ms
-         creating samples Labels (287475 pairs) ... took 23.3032ms
-         moving neighbor data to dtype/device ... took 46.696ms
-         creating neighbors TensorBlock ... took 0.153897ms
-      converting neighbors without ghosts remapping took 74.3901ms
-   creating System from LAMMPS data took 78.159ms
-   running Model::forward ... took 0.829225ms
-   running Model::backward ... took 5.51044ms
-   storing model output in LAMMPS data structures ... took 0.057366ms
-PairMetatensor::compute took 106.044ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.09002ms
-         creating samples Labels (287482 pairs) ... took 23.5512ms
-         moving neighbor data to dtype/device ... took 48.0461ms
-         creating neighbors TensorBlock ... took 0.152398ms
-      converting neighbors without ghosts remapping took 75.9422ms
-   creating System from LAMMPS data took 79.7325ms
-   running Model::forward ... took 77.5851ms
-   running Model::backward ... took 83.218ms
-   storing model output in LAMMPS data structures ... took 0.062749ms
-PairMetatensor::compute took 260.335ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.87825ms
-         creating samples Labels (287505 pairs) ... took 21.3125ms
-         moving neighbor data to dtype/device ... took 44.8691ms
-         creating neighbors TensorBlock ... took 0.158792ms
-      converting neighbors without ghosts remapping took 70.3504ms
-   creating System from LAMMPS data took 74.1946ms
-   running Model::forward ... took 176.959ms
-   running Model::backward ... took 85.9972ms
-   storing model output in LAMMPS data structures ... took 0.066223ms
-PairMetatensor::compute took 358.545ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.98879ms
-         creating samples Labels (287516 pairs) ... took 25.7911ms
-         moving neighbor data to dtype/device ... took 43.0523ms
-         creating neighbors TensorBlock ... took 0.179211ms
-      converting neighbors without ghosts remapping took 73.1407ms
-   creating System from LAMMPS data took 76.9685ms
-   running Model::forward ... took 0.796504ms
-   running Model::backward ... took 5.46691ms
-   storing model output in LAMMPS data structures ... took 0.0573ms
-PairMetatensor::compute took 103.437ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.85184ms
-         creating samples Labels (287537 pairs) ... took 25.9099ms
-         moving neighbor data to dtype/device ... took 44.8881ms
-         creating neighbors TensorBlock ... took 0.144284ms
-      converting neighbors without ghosts remapping took 74.9193ms
-   creating System from LAMMPS data took 78.7224ms
-   running Model::forward ... took 0.796368ms
-   running Model::backward ... took 4.41124ms
-   storing model output in LAMMPS data structures ... took 0.037555ms
-PairMetatensor::compute took 104.07ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.94243ms
-         creating samples Labels (287557 pairs) ... took 27.4788ms
-         moving neighbor data to dtype/device ... took 43.7007ms
-         creating neighbors TensorBlock ... took 0.149797ms
-      converting neighbors without ghosts remapping took 76.4321ms
-   creating System from LAMMPS data took 80.292ms
-   running Model::forward ... took 0.786151ms
-   running Model::backward ... took 4.48245ms
-   storing model output in LAMMPS data structures ... took 0.056347ms
-PairMetatensor::compute took 106.904ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.86605ms
-         creating samples Labels (287595 pairs) ... took 24.2911ms
-         moving neighbor data to dtype/device ... took 42.1352ms
-         creating neighbors TensorBlock ... took 0.132483ms
-      converting neighbors without ghosts remapping took 70.5209ms
-   creating System from LAMMPS data took 74.3363ms
-   running Model::forward ... took 0.794169ms
-   running Model::backward ... took 5.45675ms
-   storing model output in LAMMPS data structures ... took 0.056627ms
-PairMetatensor::compute took 102.031ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.86863ms
-         creating samples Labels (287636 pairs) ... took 24.6937ms
-         moving neighbor data to dtype/device ... took 42.6833ms
-         creating neighbors TensorBlock ... took 0.140299ms
-      converting neighbors without ghosts remapping took 71.514ms
-   creating System from LAMMPS data took 75.3745ms
-   running Model::forward ... took 0.796606ms
-   running Model::backward ... took 5.46897ms
-   storing model output in LAMMPS data structures ... took 0.057387ms
-PairMetatensor::compute took 103.604ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.0149ms
-         creating samples Labels (287687 pairs) ... took 30.3688ms
-         moving neighbor data to dtype/device ... took 47.8568ms
-         creating neighbors TensorBlock ... took 0.171662ms
-      converting neighbors without ghosts remapping took 82.5799ms
-   creating System from LAMMPS data took 86.4301ms
-   running Model::forward ... took 0.814056ms
-   running Model::backward ... took 5.48045ms
-   storing model output in LAMMPS data structures ... took 0.057549ms
-PairMetatensor::compute took 113.786ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.93499ms
-         creating samples Labels (287777 pairs) ... took 25.0759ms
-         moving neighbor data to dtype/device ... took 42.6198ms
-         creating neighbors TensorBlock ... took 0.145933ms
-      converting neighbors without ghosts remapping took 71.8792ms
-   creating System from LAMMPS data took 75.706ms
-   running Model::forward ... took 0.793044ms
-   running Model::backward ... took 4.46085ms
-   storing model output in LAMMPS data structures ... took 0.056909ms
-PairMetatensor::compute took 102.163ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.8423ms
-         creating samples Labels (287896 pairs) ... took 25.1011ms
-         moving neighbor data to dtype/device ... took 42.3414ms
-         creating neighbors TensorBlock ... took 0.137188ms
-      converting neighbors without ghosts remapping took 71.5242ms
-   creating System from LAMMPS data took 75.3401ms
-   running Model::forward ... took 0.785898ms
-   running Model::backward ... took 4.44117ms
-   storing model output in LAMMPS data structures ... took 0.056591ms
-PairMetatensor::compute took 101.976ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.94416ms
-         creating samples Labels (287958 pairs) ... took 24.3388ms
-         moving neighbor data to dtype/device ... took 42.0271ms
-         creating neighbors TensorBlock ... took 0.144518ms
-      converting neighbors without ghosts remapping took 70.5601ms
-   creating System from LAMMPS data took 74.3993ms
-   running Model::forward ... took 0.840508ms
-   running Model::backward ... took 4.42589ms
-   storing model output in LAMMPS data structures ... took 0.058294ms
-PairMetatensor::compute took 101.098ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.91144ms
-         creating samples Labels (288069 pairs) ... took 24.7845ms
-         moving neighbor data to dtype/device ... took 42.2546ms
-         creating neighbors TensorBlock ... took 0.152261ms
-      converting neighbors without ghosts remapping took 71.2304ms
-   creating System from LAMMPS data took 75.0196ms
-   running Model::forward ... took 0.797094ms
-   running Model::backward ... took 5.46408ms
-   storing model output in LAMMPS data structures ... took 0.056855ms
-PairMetatensor::compute took 101.771ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.881ms
-         creating samples Labels (288176 pairs) ... took 25.8646ms
-         moving neighbor data to dtype/device ... took 42.9685ms
-         creating neighbors TensorBlock ... took 0.148208ms
-      converting neighbors without ghosts remapping took 72.9953ms
-   creating System from LAMMPS data took 76.7733ms
-   running Model::forward ... took 0.815417ms
-   running Model::backward ... took 4.56774ms
-   storing model output in LAMMPS data structures ... took 0.057017ms
-PairMetatensor::compute took 103.805ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.10785ms
-         creating samples Labels (288330 pairs) ... took 26.4028ms
-         moving neighbor data to dtype/device ... took 47.7583ms
-         creating neighbors TensorBlock ... took 0.167695ms
-      converting neighbors without ghosts remapping took 78.5742ms
-   creating System from LAMMPS data took 82.2877ms
-   running Model::forward ... took 0.828349ms
-   running Model::backward ... took 5.24852ms
-   storing model output in LAMMPS data structures ... took 0.046771ms
-PairMetatensor::compute took 108.888ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.8536ms
-         creating samples Labels (288474 pairs) ... took 25.3589ms
-         moving neighbor data to dtype/device ... took 42.7507ms
-         creating neighbors TensorBlock ... took 0.147038ms
-      converting neighbors without ghosts remapping took 72.2234ms
-   creating System from LAMMPS data took 76.1239ms
-   running Model::forward ... took 0.796708ms
-   running Model::backward ... took 4.29603ms
-   storing model output in LAMMPS data structures ... took 0.045503ms
-PairMetatensor::compute took 101.346ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.86989ms
-         creating samples Labels (288678 pairs) ... took 25.0448ms
-         moving neighbor data to dtype/device ... took 43.4583ms
-         creating neighbors TensorBlock ... took 0.128371ms
-      converting neighbors without ghosts remapping took 72.6046ms
-   creating System from LAMMPS data took 76.4345ms
-   running Model::forward ... took 0.81739ms
-   running Model::backward ... took 5.28755ms
-   storing model output in LAMMPS data structures ... took 0.047229ms
-PairMetatensor::compute took 103.19ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.94098ms
-         creating samples Labels (288844 pairs) ... took 24.8531ms
-         moving neighbor data to dtype/device ... took 42.6709ms
-         creating neighbors TensorBlock ... took 0.1316ms
-      converting neighbors without ghosts remapping took 71.6905ms
-   creating System from LAMMPS data took 75.4517ms
-   running Model::forward ... took 0.802039ms
-   running Model::backward ... took 5.31365ms
-   storing model output in LAMMPS data structures ... took 0.046767ms
-PairMetatensor::compute took 102.01ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.07971ms
-         creating samples Labels (289027 pairs) ... took 25.4189ms
-         moving neighbor data to dtype/device ... took 42.4057ms
-         creating neighbors TensorBlock ... took 0.146186ms
-      converting neighbors without ghosts remapping took 72.2031ms
-   creating System from LAMMPS data took 75.9867ms
-   running Model::forward ... took 0.796918ms
-   running Model::backward ... took 5.29274ms
-   storing model output in LAMMPS data structures ... took 0.04836ms
-PairMetatensor::compute took 102.418ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.16418ms
-         creating samples Labels (289221 pairs) ... took 24.4701ms
-         moving neighbor data to dtype/device ... took 42.9573ms
-         creating neighbors TensorBlock ... took 0.127983ms
-      converting neighbors without ghosts remapping took 71.839ms
-   creating System from LAMMPS data took 75.7468ms
-   running Model::forward ... took 0.804939ms
-   running Model::backward ... took 4.34084ms
-   storing model output in LAMMPS data structures ... took 0.054098ms
-PairMetatensor::compute took 101.649ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.97463ms
-         creating samples Labels (289425 pairs) ... took 25.1675ms
-         moving neighbor data to dtype/device ... took 42.3033ms
-         creating neighbors TensorBlock ... took 0.130301ms
-      converting neighbors without ghosts remapping took 71.6757ms
-   creating System from LAMMPS data took 75.4759ms
-   running Model::forward ... took 0.801932ms
-   running Model::backward ... took 5.2958ms
-   storing model output in LAMMPS data structures ... took 0.047057ms
-PairMetatensor::compute took 101.711ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.9883ms
-         creating samples Labels (289704 pairs) ... took 25.1155ms
-         moving neighbor data to dtype/device ... took 43.0204ms
-         creating neighbors TensorBlock ... took 0.125327ms
-      converting neighbors without ghosts remapping took 72.3464ms
-   creating System from LAMMPS data took 76.1621ms
-   running Model::forward ... took 0.792195ms
-   running Model::backward ... took 5.2859ms
-   storing model output in LAMMPS data structures ... took 0.050063ms
-PairMetatensor::compute took 102.437ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.00214ms
-         creating samples Labels (289967 pairs) ... took 25.1019ms
-         moving neighbor data to dtype/device ... took 42.5831ms
-         creating neighbors TensorBlock ... took 0.124166ms
-      converting neighbors without ghosts remapping took 71.9027ms
-   creating System from LAMMPS data took 75.7085ms
-   running Model::forward ... took 0.792045ms
-   running Model::backward ... took 5.32339ms
-   storing model output in LAMMPS data structures ... took 0.048596ms
-PairMetatensor::compute took 102.289ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.98781ms
-         creating samples Labels (290287 pairs) ... took 25.7117ms
-         moving neighbor data to dtype/device ... took 42.9473ms
-         creating neighbors TensorBlock ... took 0.118888ms
-      converting neighbors without ghosts remapping took 72.8545ms
-   creating System from LAMMPS data took 76.6491ms
-   running Model::forward ... took 0.79426ms
-   running Model::backward ... took 4.36788ms
-   storing model output in LAMMPS data structures ... took 0.049178ms
-PairMetatensor::compute took 104.041ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.99266ms
-         creating samples Labels (290581 pairs) ... took 25.3113ms
-         moving neighbor data to dtype/device ... took 43.8251ms
-         creating neighbors TensorBlock ... took 0.127251ms
-      converting neighbors without ghosts remapping took 73.3516ms
-   creating System from LAMMPS data took 77.2188ms
-   running Model::forward ... took 0.811232ms
-   running Model::backward ... took 5.35707ms
-   storing model output in LAMMPS data structures ... took 0.048821ms
-PairMetatensor::compute took 103.735ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.0835ms
-         creating samples Labels (290886 pairs) ... took 25.093ms
-         moving neighbor data to dtype/device ... took 42.4797ms
-         creating neighbors TensorBlock ... took 0.121996ms
-      converting neighbors without ghosts remapping took 71.8705ms
-   creating System from LAMMPS data took 75.6702ms
-   running Model::forward ... took 0.819144ms
-   running Model::backward ... took 5.34722ms
-   storing model output in LAMMPS data structures ... took 0.047607ms
-PairMetatensor::compute took 102.176ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.01827ms
-         creating samples Labels (291241 pairs) ... took 24.6563ms
-         moving neighbor data to dtype/device ... took 43.0411ms
-         creating neighbors TensorBlock ... took 0.119021ms
-      converting neighbors without ghosts remapping took 71.9252ms
-   creating System from LAMMPS data took 75.7188ms
-   running Model::forward ... took 0.814068ms
-   running Model::backward ... took 5.34419ms
-   storing model output in LAMMPS data structures ... took 0.047731ms
-PairMetatensor::compute took 102.234ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.97971ms
-         creating samples Labels (291542 pairs) ... took 26.4914ms
-         moving neighbor data to dtype/device ... took 42.7634ms
-         creating neighbors TensorBlock ... took 0.171579ms
-      converting neighbors without ghosts remapping took 73.564ms
-   creating System from LAMMPS data took 77.3481ms
-   running Model::forward ... took 0.813763ms
-   running Model::backward ... took 4.52802ms
-   storing model output in LAMMPS data structures ... took 0.059323ms
-PairMetatensor::compute took 104.036ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.95989ms
-         creating samples Labels (291870 pairs) ... took 25.0878ms
-         moving neighbor data to dtype/device ... took 43.3677ms
-         creating neighbors TensorBlock ... took 0.13649ms
-      converting neighbors without ghosts remapping took 72.6612ms
-   creating System from LAMMPS data took 76.4539ms
-   running Model::forward ... took 0.813403ms
-   running Model::backward ... took 5.69688ms
-   storing model output in LAMMPS data structures ... took 0.091004ms
-PairMetatensor::compute took 104.832ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.24265ms
-         creating samples Labels (292210 pairs) ... took 25.022ms
-         moving neighbor data to dtype/device ... took 43.0857ms
-         creating neighbors TensorBlock ... took 0.136683ms
-      converting neighbors without ghosts remapping took 72.593ms
-   creating System from LAMMPS data took 76.3843ms
-   running Model::forward ... took 0.793138ms
-   running Model::backward ... took 4.74492ms
-   storing model output in LAMMPS data structures ... took 0.091383ms
-PairMetatensor::compute took 103.574ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.96934ms
-         creating samples Labels (292564 pairs) ... took 24.878ms
-         moving neighbor data to dtype/device ... took 43.036ms
-         creating neighbors TensorBlock ... took 0.134395ms
-      converting neighbors without ghosts remapping took 72.1177ms
-   creating System from LAMMPS data took 75.892ms
-   running Model::forward ... took 0.799989ms
-   running Model::backward ... took 4.77885ms
-   storing model output in LAMMPS data structures ... took 0.093422ms
-PairMetatensor::compute took 103.114ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 3.99093ms
-         creating samples Labels (292906 pairs) ... took 25.5402ms
-         moving neighbor data to dtype/device ... took 43.2155ms
-         creating neighbors TensorBlock ... took 0.13094ms
-      converting neighbors without ghosts remapping took 72.9793ms
-   creating System from LAMMPS data took 76.7716ms
-   running Model::forward ... took 0.791921ms
-   running Model::backward ... took 5.75108ms
-   storing model output in LAMMPS data structures ... took 0.091747ms
-PairMetatensor::compute took 104.958ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.17409ms
-         creating samples Labels (293304 pairs) ... took 25.5659ms
-         moving neighbor data to dtype/device ... took 43.0677ms
-         creating neighbors TensorBlock ... took 0.142192ms
-      converting neighbors without ghosts remapping took 73.0583ms
-   creating System from LAMMPS data took 76.8456ms
-   running Model::forward ... took 0.79383ms
-   running Model::backward ... took 4.73772ms
-   storing model output in LAMMPS data structures ... took 0.059033ms
-PairMetatensor::compute took 104.285ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.47246ms
-         creating samples Labels (293615 pairs) ... took 26.963ms
-         moving neighbor data to dtype/device ... took 47.174ms
-         creating neighbors TensorBlock ... took 0.171036ms
-      converting neighbors without ghosts remapping took 78.9197ms
-   creating System from LAMMPS data took 82.6835ms
-   running Model::forward ... took 0.800391ms
-   running Model::backward ... took 4.52238ms
-   storing model output in LAMMPS data structures ... took 0.063791ms
-PairMetatensor::compute took 110.162ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.29547ms
-         creating samples Labels (293939 pairs) ... took 26.9884ms
-         moving neighbor data to dtype/device ... took 46.889ms
-         creating neighbors TensorBlock ... took 0.118789ms
-      converting neighbors without ghosts remapping took 78.3778ms
-   creating System from LAMMPS data took 82.1475ms
-   running Model::forward ... took 0.831121ms
-   running Model::backward ... took 5.58461ms
-   storing model output in LAMMPS data structures ... took 0.058439ms
-PairMetatensor::compute took 110.362ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.46633ms
-         creating samples Labels (294384 pairs) ... took 27.0357ms
-         moving neighbor data to dtype/device ... took 46.0578ms
-         creating neighbors TensorBlock ... took 0.118081ms
-      converting neighbors without ghosts remapping took 77.7843ms
-   creating System from LAMMPS data took 81.557ms
-   running Model::forward ... took 0.818014ms
-   running Model::backward ... took 4.5549ms
-   storing model output in LAMMPS data structures ... took 0.059593ms
-PairMetatensor::compute took 108.802ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.32953ms
-         creating samples Labels (294730 pairs) ... took 27.4958ms
-         moving neighbor data to dtype/device ... took 46.6081ms
-         creating neighbors TensorBlock ... took 0.119943ms
-      converting neighbors without ghosts remapping took 78.664ms
-   creating System from LAMMPS data took 82.4838ms
-   running Model::forward ... took 0.822318ms
-   running Model::backward ... took 4.58338ms
-   storing model output in LAMMPS data structures ... took 0.05651ms
-PairMetatensor::compute took 109.732ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.37008ms
-         creating samples Labels (295128 pairs) ... took 25.3038ms
-         moving neighbor data to dtype/device ... took 44.1865ms
-         creating neighbors TensorBlock ... took 0.12486ms
-      converting neighbors without ghosts remapping took 74.0953ms
-   creating System from LAMMPS data took 77.8753ms
-   running Model::forward ... took 0.81465ms
-   running Model::backward ... took 5.5608ms
-   storing model output in LAMMPS data structures ... took 0.057318ms
-PairMetatensor::compute took 106.207ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.53388ms
-         creating samples Labels (295493 pairs) ... took 27.951ms
-         moving neighbor data to dtype/device ... took 46.5945ms
-         creating neighbors TensorBlock ... took 0.118681ms
-      converting neighbors without ghosts remapping took 79.3018ms
-   creating System from LAMMPS data took 83.0563ms
-   running Model::forward ... took 0.844475ms
-   running Model::backward ... took 4.60739ms
-   storing model output in LAMMPS data structures ... took 0.057325ms
-PairMetatensor::compute took 110.277ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.42942ms
-         creating samples Labels (295884 pairs) ... took 32.9889ms
-         moving neighbor data to dtype/device ... took 44.0055ms
-         creating neighbors TensorBlock ... took 0.196886ms
-      converting neighbors without ghosts remapping took 81.7571ms
-   creating System from LAMMPS data took 85.5441ms
-   running Model::forward ... took 0.841437ms
-   running Model::backward ... took 4.58532ms
-   storing model output in LAMMPS data structures ... took 0.05781ms
-PairMetatensor::compute took 113.075ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.14909ms
-         creating samples Labels (296299 pairs) ... took 28.1697ms
-         moving neighbor data to dtype/device ... took 44.354ms
-         creating neighbors TensorBlock ... took 0.167661ms
-      converting neighbors without ghosts remapping took 76.9825ms
-   creating System from LAMMPS data took 80.7883ms
-   running Model::forward ... took 0.834223ms
-   running Model::backward ... took 5.61081ms
-   storing model output in LAMMPS data structures ... took 0.056749ms
-PairMetatensor::compute took 109.039ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.11184ms
-         creating samples Labels (296691 pairs) ... took 28.298ms
-         moving neighbor data to dtype/device ... took 43.5374ms
-         creating neighbors TensorBlock ... took 0.137286ms
-      converting neighbors without ghosts remapping took 76.1969ms
-   creating System from LAMMPS data took 79.9782ms
-   running Model::forward ... took 0.865365ms
-   running Model::backward ... took 5.57801ms
-   storing model output in LAMMPS data structures ... took 0.056578ms
-PairMetatensor::compute took 108.461ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.13337ms
-         creating samples Labels (297135 pairs) ... took 26.1319ms
-         moving neighbor data to dtype/device ... took 46.508ms
-         creating neighbors TensorBlock ... took 0.14884ms
-      converting neighbors without ghosts remapping took 77.0658ms
-   creating System from LAMMPS data took 80.8101ms
-   running Model::forward ... took 0.831432ms
-   running Model::backward ... took 5.6335ms
-   storing model output in LAMMPS data structures ... took 0.058471ms
-PairMetatensor::compute took 109.541ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.40323ms
-         creating samples Labels (297532 pairs) ... took 28.7591ms
-         moving neighbor data to dtype/device ... took 45.2004ms
-         creating neighbors TensorBlock ... took 0.146514ms
-      converting neighbors without ghosts remapping took 78.7184ms
-   creating System from LAMMPS data took 82.478ms
-   running Model::forward ... took 0.828291ms
-   running Model::backward ... took 5.62932ms
-   storing model output in LAMMPS data structures ... took 0.058569ms
-PairMetatensor::compute took 110.93ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.47454ms
-         creating samples Labels (297903 pairs) ... took 25.8619ms
-         moving neighbor data to dtype/device ... took 44.0263ms
-         creating neighbors TensorBlock ... took 0.132597ms
-      converting neighbors without ghosts remapping took 74.5869ms
-   creating System from LAMMPS data took 78.3184ms
-   running Model::forward ... took 0.845786ms
-   running Model::backward ... took 5.62956ms
-   storing model output in LAMMPS data structures ... took 0.056014ms
-PairMetatensor::compute took 106.648ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.10345ms
-         creating samples Labels (298352 pairs) ... took 25.8355ms
-         moving neighbor data to dtype/device ... took 44.6317ms
-         creating neighbors TensorBlock ... took 0.126878ms
-      converting neighbors without ghosts remapping took 74.7952ms
-   creating System from LAMMPS data took 78.5922ms
-   running Model::forward ... took 0.841141ms
-   running Model::backward ... took 5.62008ms
-   storing model output in LAMMPS data structures ... took 0.058785ms
-PairMetatensor::compute took 107.218ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.34407ms
-         creating samples Labels (298778 pairs) ... took 25.9249ms
-         moving neighbor data to dtype/device ... took 44.0698ms
-         creating neighbors TensorBlock ... took 0.132797ms
-      converting neighbors without ghosts remapping took 74.565ms
-   creating System from LAMMPS data took 78.2997ms
-   running Model::forward ... took 0.836468ms
-   running Model::backward ... took 5.65783ms
-   storing model output in LAMMPS data structures ... took 0.06558ms
-PairMetatensor::compute took 107.012ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.26671ms
-         creating samples Labels (299186 pairs) ... took 25.5114ms
-         moving neighbor data to dtype/device ... took 45.3726ms
-         creating neighbors TensorBlock ... took 0.146522ms
-      converting neighbors without ghosts remapping took 75.4216ms
-   creating System from LAMMPS data took 79.2028ms
-   running Model::forward ... took 0.831292ms
-   running Model::backward ... took 5.65998ms
-   storing model output in LAMMPS data structures ... took 0.057471ms
-PairMetatensor::compute took 107.858ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.26043ms
-         creating samples Labels (299630 pairs) ... took 25.7133ms
-         moving neighbor data to dtype/device ... took 44.2985ms
-         creating neighbors TensorBlock ... took 0.139194ms
-      converting neighbors without ghosts remapping took 74.5063ms
-   creating System from LAMMPS data took 78.2937ms
-   running Model::forward ... took 0.848767ms
-   running Model::backward ... took 5.55194ms
-   storing model output in LAMMPS data structures ... took 0.070877ms
-PairMetatensor::compute took 107.014ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.22782ms
-         creating samples Labels (300081 pairs) ... took 25.5103ms
-         moving neighbor data to dtype/device ... took 45.0771ms
-         creating neighbors TensorBlock ... took 0.133913ms
-      converting neighbors without ghosts remapping took 75.0436ms
-   creating System from LAMMPS data took 78.8208ms
-   running Model::forward ... took 0.840829ms
-   running Model::backward ... took 4.64547ms
-   storing model output in LAMMPS data structures ... took 0.058274ms
-PairMetatensor::compute took 106.422ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.26752ms
-         creating samples Labels (300351 pairs) ... took 26.3499ms
-         moving neighbor data to dtype/device ... took 44.4433ms
-         creating neighbors TensorBlock ... took 0.129019ms
-      converting neighbors without ghosts remapping took 75.2921ms
-   creating System from LAMMPS data took 79.0846ms
-   running Model::forward ... took 0.836149ms
-   running Model::backward ... took 4.6571ms
-   storing model output in LAMMPS data structures ... took 0.058405ms
-PairMetatensor::compute took 105.853ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.09266ms
-         creating samples Labels (300725 pairs) ... took 26.2519ms
-         moving neighbor data to dtype/device ... took 44.6107ms
-         creating neighbors TensorBlock ... took 0.130586ms
-      converting neighbors without ghosts remapping took 75.1803ms
-   creating System from LAMMPS data took 79.0191ms
-   running Model::forward ... took 0.811081ms
-   running Model::backward ... took 4.64008ms
-   storing model output in LAMMPS data structures ... took 0.060501ms
-PairMetatensor::compute took 106.259ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.23228ms
-         creating samples Labels (301239 pairs) ... took 25.9451ms
-         moving neighbor data to dtype/device ... took 45.4106ms
-         creating neighbors TensorBlock ... took 0.139287ms
-      converting neighbors without ghosts remapping took 75.8538ms
-   creating System from LAMMPS data took 79.7576ms
-   running Model::forward ... took 0.838543ms
-   running Model::backward ... took 4.6372ms
-   storing model output in LAMMPS data structures ... took 0.059043ms
-PairMetatensor::compute took 107.613ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.51078ms
-         creating samples Labels (301663 pairs) ... took 25.7448ms
-         moving neighbor data to dtype/device ... took 44.8206ms
-         creating neighbors TensorBlock ... took 0.130675ms
-      converting neighbors without ghosts remapping took 75.3028ms
-   creating System from LAMMPS data took 79.1174ms
-   running Model::forward ... took 0.827499ms
-   running Model::backward ... took 5.69755ms
-   storing model output in LAMMPS data structures ... took 0.058519ms
-PairMetatensor::compute took 108.023ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.31447ms
-         creating samples Labels (302128 pairs) ... took 26.3658ms
-         moving neighbor data to dtype/device ... took 44.9515ms
-         creating neighbors TensorBlock ... took 0.131688ms
-      converting neighbors without ghosts remapping took 75.8597ms
-   creating System from LAMMPS data took 79.6698ms
-   running Model::forward ... took 0.838657ms
-   running Model::backward ... took 5.7ms
-   storing model output in LAMMPS data structures ... took 0.063636ms
-PairMetatensor::compute took 108.726ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.13698ms
-         creating samples Labels (302435 pairs) ... took 26.2556ms
-         moving neighbor data to dtype/device ... took 44.792ms
-         creating neighbors TensorBlock ... took 0.138658ms
-      converting neighbors without ghosts remapping took 75.4175ms
-   creating System from LAMMPS data took 79.1884ms
-   running Model::forward ... took 0.811625ms
-   running Model::backward ... took 4.59132ms
-   storing model output in LAMMPS data structures ... took 0.059576ms
-PairMetatensor::compute took 106.986ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.34997ms
-         creating samples Labels (302854 pairs) ... took 26.4064ms
-         moving neighbor data to dtype/device ... took 45.3941ms
-         creating neighbors TensorBlock ... took 0.159971ms
-      converting neighbors without ghosts remapping took 76.4253ms
-   creating System from LAMMPS data took 80.1712ms
-   running Model::forward ... took 0.817105ms
-   running Model::backward ... took 4.65088ms
-   storing model output in LAMMPS data structures ... took 0.056776ms
-PairMetatensor::compute took 108.079ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.30485ms
-         creating samples Labels (303253 pairs) ... took 26.773ms
-         moving neighbor data to dtype/device ... took 45.9914ms
-         creating neighbors TensorBlock ... took 0.145932ms
-      converting neighbors without ghosts remapping took 77.3438ms
-   creating System from LAMMPS data took 81.1409ms
-   running Model::forward ... took 0.809335ms
-   running Model::backward ... took 4.63747ms
-   storing model output in LAMMPS data structures ... took 0.056778ms
-PairMetatensor::compute took 109.009ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.28401ms
-         creating samples Labels (303629 pairs) ... took 25.462ms
-         moving neighbor data to dtype/device ... took 45.2866ms
-         creating neighbors TensorBlock ... took 0.133442ms
-      converting neighbors without ghosts remapping took 75.2645ms
-   creating System from LAMMPS data took 79.0295ms
-   running Model::forward ... took 0.82075ms
-   running Model::backward ... took 4.57621ms
-   storing model output in LAMMPS data structures ... took 0.058918ms
-PairMetatensor::compute took 107.623ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.43525ms
-         creating samples Labels (304060 pairs) ... took 25.4936ms
-         moving neighbor data to dtype/device ... took 45.1384ms
-         creating neighbors TensorBlock ... took 0.139289ms
-      converting neighbors without ghosts remapping took 75.3064ms
-   creating System from LAMMPS data took 79.0662ms
-   running Model::forward ... took 0.814706ms
-   running Model::backward ... took 4.64071ms
-   storing model output in LAMMPS data structures ... took 0.056767ms
-PairMetatensor::compute took 106.888ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.3233ms
-         creating samples Labels (304441 pairs) ... took 25.5078ms
-         moving neighbor data to dtype/device ... took 45.0776ms
-         creating neighbors TensorBlock ... took 0.135219ms
-      converting neighbors without ghosts remapping took 75.1394ms
-   creating System from LAMMPS data took 78.9663ms
-   running Model::forward ... took 0.809689ms
-   running Model::backward ... took 4.66938ms
-   storing model output in LAMMPS data structures ... took 0.057817ms
-PairMetatensor::compute took 106.891ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.31935ms
-         creating samples Labels (304791 pairs) ... took 26.1111ms
-         moving neighbor data to dtype/device ... took 47.914ms
-         creating neighbors TensorBlock ... took 0.130429ms
-      converting neighbors without ghosts remapping took 78.5677ms
-   creating System from LAMMPS data took 82.3575ms
-   running Model::forward ... took 0.812575ms
-   running Model::backward ... took 5.70302ms
-   storing model output in LAMMPS data structures ... took 0.058493ms
-PairMetatensor::compute took 111.347ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.42223ms
-         creating samples Labels (305069 pairs) ... took 25.5796ms
-         moving neighbor data to dtype/device ... took 46.0771ms
-         creating neighbors TensorBlock ... took 0.140139ms
-      converting neighbors without ghosts remapping took 76.3158ms
-   creating System from LAMMPS data took 80.0588ms
-   running Model::forward ... took 0.812627ms
-   running Model::backward ... took 5.72019ms
-   storing model output in LAMMPS data structures ... took 0.057552ms
-PairMetatensor::compute took 109.303ms
-
-PairMetatensor::compute ...
-   creating System from LAMMPS data ...
-      converting neighbors without ghosts remapping ...
-         filtering LAMMPS neighbor list ... took 4.32084ms
-         creating samples Labels (305457 pairs) ... took 26.8623ms
-         moving neighbor data to dtype/device ... took 46.7501ms
-         creating neighbors TensorBlock ... took 0.131824ms
-      converting neighbors without ghosts remapping took 78.1689ms
-   creating System from LAMMPS data took 81.9768ms
-   running Model::forward ... took 0.818184ms
-   running Model::backward ... took 4.7001ms
-   storing model output in LAMMPS data structures ... took 0.087176ms
-PairMetatensor::compute took 110.126ms
\ No newline at end of file

From 685b8d60b124dd7b40b96854d72ec8a4635e452e Mon Sep 17 00:00:00 2001
From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com>
Date: Wed, 30 Oct 2024 17:58:01 +0100
Subject: [PATCH 09/15] Update src/KOKKOS/metatensor_system_kokkos.cpp

Co-authored-by: Guillaume Fraux <luthaf@luthaf.fr>
---
 src/KOKKOS/metatensor_system_kokkos.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index bbe7bfe98fd..3da8f44b9c2 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -56,9 +56,9 @@ MetatensorSystemAdaptorKokkos<DeviceType>::MetatensorSystemAdaptorKokkos(LAMMPS
     request->set_id(0);
     request->set_cutoff(options_.interaction_range);
     // set whether the kokkos NL should be calculated on host or device
-    request->set_kokkos_host(std::is_same_v<DeviceType,LMPHostType> &&
-                            !std::is_same_v<DeviceType,LMPDeviceType>);
-    request->set_kokkos_device(std::is_same_v<DeviceType,LMPDeviceType>);
+    request->set_kokkos_host(std::is_same_v<DeviceType, LMPHostType> &&
+                            !std::is_same_v<DeviceType, LMPDeviceType>);
+    request->set_kokkos_device(std::is_same_v<DeviceType, LMPDeviceType>);
 }
 
 template<class DeviceType>

From 28b741cc74dd05d53d69b7bc2fb6811f41c56db2 Mon Sep 17 00:00:00 2001
From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com>
Date: Wed, 30 Oct 2024 18:01:14 +0100
Subject: [PATCH 10/15] Update src/KOKKOS/metatensor_system_kokkos.cpp

Co-authored-by: Guillaume Fraux <luthaf@luthaf.fr>
---
 src/KOKKOS/metatensor_system_kokkos.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 3da8f44b9c2..417184e40b9 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -174,7 +174,7 @@ void MetatensorSystemAdaptorKokkos<DeviceType>::setup_neighbors_remap(metatensor
     auto max_number_of_neighbors = list_kk->maxneighs;
 
     // mask neighbors_kk with NEIGHMASK. Torch doesn't have this functionality, we do it in Kokkos
-    Kokkos::View<int**, Kokkos::LayoutRight, DeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
+    Kokkos::View<int32_t**, Kokkos::LayoutRight, DeviceType> neighbors_kk_masked("neighbors_kk_masked", total_n_atoms, max_number_of_neighbors);
     Kokkos::parallel_for("mask_neigh", total_n_atoms*max_number_of_neighbors, KOKKOS_LAMBDA(int i) {
         auto local_i = i / max_number_of_neighbors;
         auto local_j = i % max_number_of_neighbors;

From 3407b81626b5e34e83151f305187651876519f7f Mon Sep 17 00:00:00 2001
From: Filippo Bigi <98903385+frostedoyster@users.noreply.github.com>
Date: Wed, 30 Oct 2024 18:03:37 +0100
Subject: [PATCH 11/15] Update src/KOKKOS/metatensor_system_kokkos.cpp

Co-authored-by: Guillaume Fraux <luthaf@luthaf.fr>
---
 src/KOKKOS/metatensor_system_kokkos.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 417184e40b9..8eccab9c71c 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -325,7 +325,7 @@ void MetatensorSystemAdaptorKokkos<DeviceType>::setup_neighbors_remap(metatensor
 
 template<class DeviceType>
 void MetatensorSystemAdaptorKokkos<DeviceType>::setup_neighbors_no_remap(metatensor_torch::System& system) {
-    throw std::runtime_error("The metatensor/kk requires remap_pairs to be true");
+    throw std::runtime_error("the kokkos version of metatensor requires remap_pairs to be true");
 }
 
 
From b6568d8c7e1be1461697374253c589cbf3442423 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Wed, 30 Oct 2024 18:13:30 +0100
Subject: [PATCH 12/15] Undo unintended changes

---
 cmake/Modules/Packages/ML-METATENSOR.cmake |  16 ++++++++--------
 examples/PACKAGES/metatensor/in.metatensor |  12 ++++++------
 examples/PACKAGES/metatensor/nickel-lj.pt  | Bin 37732 -> 30459 bytes
 3 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/cmake/Modules/Packages/ML-METATENSOR.cmake b/cmake/Modules/Packages/ML-METATENSOR.cmake
index 92e050614b8..294d30af26e 100644
--- a/cmake/Modules/Packages/ML-METATENSOR.cmake
+++ b/cmake/Modules/Packages/ML-METATENSOR.cmake
@@ -4,14 +4,14 @@ if(CMAKE_CXX_STANDARD LESS 17)
 be set to at least C++17")
 endif()
 
-# if (BUILD_OMP AND APPLE)
-#     message(FATAL_ERROR
-#         "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, "
-#         "since this results in two different versions of libiomp5.dylib (one "
-#         "from the system and one from Torch) being linked to the final "
-#         "executable, which then segfaults"
-#     )
-# endif()
+if (BUILD_OMP AND APPLE)
+    message(FATAL_ERROR
+        "Can not enable both BUILD_OMP and PGK_ML-METATENSOR on Apple systems, "
+        "since this results in two different versions of libiomp5.dylib (one "
+        "from the system and one from Torch) being linked to the final "
+        "executable, which then segfaults"
+    )
+endif()
 
 # Bring the `torch` target in scope to allow evaluation
 # of cmake generator expression from `metatensor_torch`
diff --git a/examples/PACKAGES/metatensor/in.metatensor b/examples/PACKAGES/metatensor/in.metatensor
index 9b93563a5c9..59a32c89e4a 100644
--- a/examples/PACKAGES/metatensor/in.metatensor
+++ b/examples/PACKAGES/metatensor/in.metatensor
@@ -3,7 +3,7 @@ boundary p p p
 
 atom_style atomic
 lattice fcc 3.6
-region box block 0 8 0 8 0 8
+region box block 0 2 0 2 0 2
 create_box 1 box
 create_atoms 1 box
 
@@ -12,16 +12,16 @@ mass Ni 58.693
 
 velocity all create 123 42
 
-pair_style metatensor nickel-lj.pt device cuda remap_pairs off
+pair_style metatensor nickel-lj.pt
 # pair_style metatensor nickel-lj-extensions.pt extensions collected-extensions/
 pair_coeff * * 28
 
 timestep 0.001
-fix 1 all nve
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0
 
-thermo 100
-thermo_style custom step temp pe etotal press vol cpu
+thermo 10
+thermo_style custom step temp pe etotal press vol
 
 # dump 1 all atom 10 dump.metatensor
 
-run 1000
+run 100
diff --git a/examples/PACKAGES/metatensor/nickel-lj.pt b/examples/PACKAGES/metatensor/nickel-lj.pt
index 23f5c393b1a8d8899edc1394945a838338eed94b..7128011161a30f38562c620ece460d09482b42d6 100644
GIT binary patch
delta 23652
zcmYiNb8sch^EM8j*x1;%ZQItyHcsrE*tV06ZEkGaw!QHt+5GO$^L_t#r)p|yx_Wwg
zYO1EEyRSYj1OK=IOT+`=0N??RHfDAf_Kfzn%obj5E+)(l7H%fyCT=EDV$cBH|JQuU
z{+nBn@sW_Rv9NJ4vT!o8a%r$~@Ue68v9U3+b8xV8v(mG$@UgIvF_4e}?M>V)om?FL
z$8X}|z{SBRVdCOx<4ESOA`b&Qn_r(T0{{U2*HuzBz;Gdm0rW}r_$h(WP!LjL)F}rh
z#7Qb}Nr5#GWB{fV=@|0=sC55FMNAO^hlFGOpO}x%45Ji9a54x&080ua6bTr^|FIw=
z{ihHU%l}yV%67E=$AXIn;Q#+S<o}Mw1hli4665~ACAiJ~P*~uQ(BJ?-b3ZsPSTiIX
zF9a0KfBG4vh$6PY%TqmFlKwX(g-H>WCx8$DaHgOpa;6}~!TzTi>;K6>2hHGZIc`lp
z>=-rP%gY2}(nMp>LeS33|5c$VhjEz4meiTYaA^J|6IOC)2KoLvd+p+B<(*{l0dNSE
zVe`CY=jUgis!`ZV_n+))nW}Dc{I|RkQFIf1t==;y4<n>(al5O}I{MD)uqeKJqBzCt
zJJ-hl@bu#5X(f1>-BJ;K3CiDT*!s=%qr&=qr;_2_A=}?yAx98YaaG^&Pu}39pdg-i
zkmJ2web?hXU)$UH8>q+K4JoS&_<-Hho9+5NLBwMec_$g3e0FnKXkqlZNKW=C9B-hi
zU7Pqr^u;Cvyb_@Tc1HM_CNjTt?5T|=rfz%-JMj%c6kTm!y!TGT2vn%1VepNLXs^oY
z=Jigz8>EOerk=tqbgBu}_QAhi^My(D`S%HzHE>d>&&<<3B;9?pyE~_BH717+g*j9t
zhIh|bB(w1aPT$3v`E{TVa!|>1Z4oYG>w4(5MW8ft`9pbeg6PjR{oKA&B3XSCHLfnx
z#}M&`QB2KNo57L6An3UH?QbfPCwDN*E2*uKF#{7NZPihw9c9>wJnizzYxjkh)6ak^
zcuBgBR1N=26#%8ucR589IN4e-Ha{1t0WKt?E@tmyy1)19(OHHk4r7h)_)0-?;FzOw
zo>#8+t7IB`52bI1-N3JoocjnJ+}pHQ#A0U?L5;xXEd$aSFc1y68$T_<ABCVWYxTWG
z^ZU~cU%Ep<%<lY=Nwd1b+zf}hR3WiUgSeL0vz4<SH5+8_w?&ZEo+xBQZ~nWY`R)|V
zz0g2haLwEMaWf_6Eso^s?cudeW>y!07|xS{lb_gB<c#T7vQy(uqL3mr39F+R_t1fR
zrMTAD`tmMo`=C&qf7B^z&{n0}u_cJJavL8;_E)9F#RKEeF82AZLGb8`N)qDA-WVcg
zne>>4UOb8C;ldWCY5ay2RK#5f{@_o`6|sT>Npm$|4+2AZf$yb+l|YX!W`Fv+aC?sR
zMev*hp2p@cS3O_aKNcbw(BAiE7qk!?Icvgwz8UmAb0Bz%^CyYF1|5jcY#rNRz}CR6
zjXO|IkIx>M!a1u^u%HmezC1uBmY$|eAkT+&Q%kR~H3$A-r4x1U47kXdSUNE3QBpt3
z<lH{4&B}^P<ZSh2P5xj}lQ!MQ8-IAm)SI_hMgN-`O=4|rvOY60y>XFf!WjI0J2zFl
zQXA-P26A2~5*m{F0qr-*!<yPUCd~j*V7d7iorgIN8!`>CcQ3*uKLho0O&Z4Y6Rf9s
z+q^-4JSFPd$3A-Ku)t>FlsI6@<v<t4E2P#@S#kqYv+Ip}6^b~!%rWxaK1b)Yh<Ce>
zCra2%x|vVrm&R5UL|uEG2lkH{v#Bt<Edf+q8PITRR|n(OwcVBRhY1_!zoFleF!U9(
zKU1_`JH=moQ9G~{M<H>OLV_8MZ)zloGW{0PC7~ZE_IJW+MybBSt&^nqbLEiz#C`t0
zi31gQqW}1(+{ScY>W+mc+m0;R)<)Qpt_+(+XO1fl;BmxylJhwKrW;SP6~9k&x{|7e
zXaQ~GO3q187-h|*-o7R}{}Rx>EH=kH*PLClox_|%<K96-i9zi!MPrLt*}QV)EA$;d
zkSm+KUDlXh-duNIF#q^yH7u(4R?P7hr_2*zR<T||{!W0@s(lV^fLU@cE_9Xrd&lsm
zH;RBRH_gRVk-Q?px6E6Q&)}uK$|_|1-UeEj_+Ek}`M#l%mr@;%dWgEu%O;6}TakO}
zO;AJ{qTax%64D;dd86b%lI985eK{&0BXadH^1Of4Rq4<5-x0)cK*=n{tTq;`M%iaw
zVet*taOO#W2m3>ms$4}95u&gZo&O7pPxO?YFV99#Yp*i>v+<m$zn-xQ2{+bl-w)K<
zD<k7EjUSE=7Cl|%XZmX4pOb!)QW8YD-P%MKayDHznRMoWoGG?~HHosGyho~fgdr-R
zmY2jq9CBU7IrjJJcSV2Nh+!&I?S1IXffRq5Ltl5w@qCNX?!li7PyV?-)8UH*YT!Zr
z_tt(aW+BZHx1=}25FFEuj$4Oq91ozxyliNaG_+vx^8`h*3@ZoLQojxX=~L3Ld~`aO
z5*E>6BB<w)!^%mZd<i8L?ljNWv9xiGWpEL%rmjO07CTiNG+c#>gkZ@C)>s=|SPP;n
z>?-2ujvGqWE@@igI8-}^QW8lOF{z4-#%)*tV+FV{V-ZaV({bT4_lT?$4>l-=Cd=ld
zcuxb8t;(A?M<w=&M-OzybgbS*pbGo7FQYwq7-aTKd>w-f2FaF_NUBA-5Ti%5jIBA7
zWzoR3gJ^9QJp0Jdxr~NH%MU9??icWMGf?yqYzJmMl(^BiO)&E|XUKe>yTFE`-i;T}
zk!oszh4rMu-p4IBN{mCHEf7?tw=hl(MTC(+xGmUk5NbwPtNw^mqR6K%2@a0c5X4X!
z<!x>|UH`8BLEx<3VQBrW^6c3^;a?`)Y-gedcSR79_sI^vdE-anL%wKV84`6BVxzNw
zfFiZzuV(%%`*#sG4_mYVBS|PqI@6ng@-JoX58CV(UX@40z9_1N4sK9W*O0Z~?Kt^K
zDqOS`-nI2tIyAmSX?!5MelBnmmV*Y}6si)N^%yo~lNqa5rp$`O^x274fZ`}6a@v6j
z@zT9-%n*An#PoRaf;53^m`;-Z#?;KK$eJUHck`UL1}Y*8N=d(F?LalXt#IX0@_OtE
zOXCl8iyNBJ7J3=j*a9f(WaA8<u(WnUC8~ZT@R`>ZQV0h|0C|0F3OC-0EP$|^%i`hr
zvYS)lLMFDc=awFNKaypQe~OIWHVL>$#x;^L>K04|zn5F|F@EE8vMX)^=?o8c9Em0G
zCjx2?0r%=+PYU_z)CCv3LWlgh!WT8j6OTr`&Bc`IL807EB@eRWM1kUCDvHuA%BGKW
zu=~T2EQ^$tABUiam^ldDOfBQ*uxR5WaJsUawM?)a@=dGyLw#<2%xA+~pmXDNJR0x3
zC--)OB|sQ=nd`5_h+tPA>&a<?SE_!rN$U1IdA%}9W({x<4sy+un97m~?iT>PQ9t!x
z3acU*bSM%g(H1Bxg6xO15BgJF#)M_e*O@Y;^`>FFCe@XKs_XDnWwv-&kKJBH5<)AZ
z@uf&K1pkf9eg?%J4@o7JVO+A!IZC1+*VJh+27bL^p6L;=ZQ|jKpFkp3>N)IoKCsG)
zQ_aI#VwvY^?Lm6k%YLnN2Kz2-Edz%opcd>G=xkiJBm%@j(iBrpp#zM(o|u#cZEz#9
z1*cTPuG(AEVM6KemA@3bG;$+xaEw|9hw3UGV|Rfi`U|&Ha?wmEphL9?xbk3nVHQfd
z%&n|NIT6E7yp7W9dzV<bm7V85Sp6xptEt!g(uv#SXRXTvXL((cH8y~pLo#Blr!LV3
z2Oky?<O3z<lD9zF`(PDReHBV*D-2f7XRD10H9$co<_1HalJ>li62j*rzH{HV_lL0b
z;p+1R754g%@Sos-(+21xVJ*9Yg!YGjkNj&2W<q@S8t``zd2R-6UwnmvE}@vu9L(HY
z89jz>C;4uU&E6d$yunVw4|0I!j>5Wr3vtrSAt0PO>Mx$0`0r@wJvy@wa3io0@2ca`
zM10qPPd8^od1?Q{U%9A*`~F7SM!W80*Xt0U5%1Pc0yy)7CHEw!FUcy|7Vgav?<EAr
zOWvgZoYH$Af6N_vl+3y#!g8|5k?roV-z3*gU9Lu$E99+zCZEe|=2K9?AthiVHUHwS
zC<CDp6PG+OX%;(u+B`8tpq~(wv2I#eG_@?sPnt)WP+$_)n#ju^Lw+=vhcKJImkjPe
zE(ch-cHD1R`UJE#o@!GG4h>=f71%gJTn4dXt=Tm<l6$TDxqscjDB8M(37sHPpvpd4
zb95*NZdFt+`8N{<9*Ph(tvmr8&`}B#6hTNs6gA{!g(*{mqP?CUR(|pal&qrjH0tki
zdsM$z8{t?9$UggJU#Qxyd!D{bcuGIMa*G372GX4=&=zQZ4>SJucJ4P?>^1cQCvT`W
zg1Iu1|C8|)M-384mlY{h&3>h7>XN45%$7GzunA$Vc^_y#sawa92XPw7!2UH4dE!rJ
zjnNz4Q>`7O-9p$ph_G@Z`m?>_&e7=1k;|Cqcbg<|l57?jn2C;`UiS~-f9^?$ogh6X
zA^`Bs@;|pCr4KI%l%@;kMKF@q=2+>D!(!~$ej-k;mu~NfV19%Eabjh5R2+x;$GVI%
zPB42xA4`mtHG@8P0Or-k(X1cZwEzw-bo)Ko9!+e#vCHU>h3sJX4n+<#Mb~%r?Zrhl
zF0Y~gi}k;M)rLJj&zCE{E1N%U^Vlq>GauJ#iYc$UxD7h~fr4-4SL?fg&y%;$^67QD
zNjT^YA_=moI%7u}Q3VY<b}yqT;sfS9#uRe3(67ATCD+zYF-bJdW$(nu<%=&ORXh5a
zvZegYsuc*eleLyc)o;dpXbHbCM}|DyMqZ3L(2uF@a?h-dOcY;RQJ%McIqZ<CB-)ON
z4c%BO3lK?mf#jT?EM0fWT)in$Zxu>Y7*Bop)1#?rdDd4PDXMDHglyvC;;z_O*>{|B
z#3%Om3kkVkbWqrF5OQSs^JMwVY4Q~|D+jz72%?^|9u$07Qj}WpqHL=N(<~`$Yi5Kd
z<Q;IPxKgroV!IkpDLbKz$Z1_^(Bi~w?Yr_6=v}6;Kx>A2ZJ7tgeAX{iM-b7CX8uW;
z4it!2Vg;WH48idaxyH{WBeBC`3$crAkv~lT`>*y<=PP8XX3|@PKj6lO*{}AjHu?QY
ze=55<ZmgUN?hLXE&n@E>RFf*2%|1lGv%Q=iu>r@ttbOj~x(OLD2a=p|C*cfR{lZPi
z68}n40rl~k=T!dlqQbLua6M|vzIs@3aG=1eQ50I3B+gSBuwFzCkMQQYFs9I2R`~1E
z^}8z{zBP7O=c+~E;wEJ12EyZapTGQ~lDWP0kK5Vk8!a`689kPhqnejNTHhQJerUfj
zRsm(!-JG1#B%a>Pnds|i`P>U+T4Iin17N5m45(O1PX@6}5&w!e)|q7j*4`tymDNcr
zfB4BX6Z>z6)OAp)!wE>Qtw!0>p65oTkC2^|d6B=SoB2w(J7v+CoOM<=pW1IK{V`+#
z<27!Ga^}Em6w!35gzasf<8iwQdlmve-<t6yr+=gT<`k{NQ$~^Pg3-x?{CIomS(n^7
z8KhY*4NmUd#JTcHv#2**CuQ2rN$W(mqr`qvPYXGwUIXRLn93LuGLSEWF=brN$79;&
z@j*-)tT^xC^<u>Va<Pb&65o|zbU7V4L4M`lRHWOSeIAMLz>cEUE61|2GGeuk@cIP$
zc+GpXs<nCK7an2Uw5X77$|$tX-V5d)fWk7a!?jIDkZ^z;j2#Ni>6Zwa)^n>gf6G3)
zGz)Wglk(!fS+SNFbm*kxFgCLZZ98taQ*L*SX$X}~qssNzqch2p;6@jdo`BND>~iF%
zyVk9kHWMl8d~O&E2tAfMR@fJ5@%;YrLV9imHm#@9!fRE8W&8zh?H@AFChd6^AVr)V
z0}{73&dy9d-&N5RDhGom;du5pj?Xi6Sd6^B;uxyRj`!@^jcg|iWq1aoaB*W6yh#bo
zd@!qQv#fui!-6Y(3Uev!s`#f_)r8g~o@z0i^!0J{vr9`O*8KJRhy#JMO7LYz5S>ha
z&7SOA5sj4XOWT6eoW+y*@mdO;Kr@26_OwNN;8!)K^1xWWd|iPu^V)+rmrWW~5qYNE
zo&=KpIpWyii&X(5BO!LP0kE!-o0yQzsb#Wmp&(29H+Faedi#hX9?!wuXgkLXxpVUg
z1F6I5;JA~l`RS3&ZY3*B0{=*z4HLa)(V^?}ep+)=to(5S<jVpBw}d|?AnYiML<%N`
zOv8m=s3-_9L2n+_jhPvc<g{Dck;~hLNeWeOp7H4dwn?=MKWhglxRBWtQ`CJv;mfAW
zlE4)`I66Dyk<w?vV0a?uT32J~c3B-`Hd16iTqn2rIN~s79i-7qJanirlA^zw$8C9a
zIhg110=jgnP2q=ruZ_jYfX;pf6+=Gd4mv(kLLm%n$lO{Ff-_m?C1C35?k8^a%%jJV
z&pmR9Ru9fn>1aKXoDr{7to&@6ePMD{ig3~(Z`7YMe3ZU+Wc^4U0771nC0vb93OP#|
znlKVb0=~_+;_Ld~SG|lt?P8B=ztdKg8XWM|tan#4L>JJe5!W3^K|JR8$&bdmn!yf9
zI-8>^w)Jh2ArKHDm2Edk6vRj#_P%-?i%VRQ$LvJ>weADKZ5Y90-2=wfdJS_*?;|AW
zu+Qzz{o=itaIL%+<NLrva65wK{a{{U_U?Ip;!n|0#(<2AjeW`dN-6n=?M0u~kHjE7
zF4sK9ZUOfeIlr`W(2Q{X;6%)sPw$!|9eazVde4d$?&gD9eIwOx!TsMX_Whd0Ss}Px
zw<D|T!L1x<cC}~%oM>0w<lW48cW`7`E$z}ORBq<wOT2y6E8dnAOb;HSZLHPB?S!%A
zWJ+FIB}qC?oF^E|e@#43W)@}a@5|WbWVUrs(R}eElASR~K=JeTxks11JKBkeq-7=3
zKUOMo#EnIWyS$|lB8C;Avgn4lER6(epP%Q7y<z_TTxDzDLj+xn9i53;zO)d+mURx!
zase?Aq0B6CFt;tQ+_cqhD`ukhMT&_*z3Q)OHznbno$a_W)jX=#-I4`LwK(`A=NXuc
zSw8rh54q|EAZhp!7!qF3*_H}gDQM{87l0{p8Nkl{C!yqt1=29or3`DBUi-#SCcWOL
z3nRtQjapJZ4gb*sT8ty(mMKK_<nE~+?mCU9NI!2;66z!xbHkvP9{h56oGZA#xRoX?
z6S;LQ{`0h8G;ecL_kP|X0>Zb+ZgHuXHxYsCYmVO<2t48pvfgHRNtbYg^rpMN#3dZ3
zC53KFgB!6XiMxj(L^dpj3T^C|m})qa0UFMNf$v%>@|>lmsmD)r&exx|Pzfi>u-@3M
zN)yCfF$iB1KgKivWaBO>YR7%*9J>fk?(>4JIEmMMAYl-CjD(bO#6*3oAx*oRTIW?_
zcd_jc%8nNK!JK^Fra@X!k<7om8EftH2YI84*Y^O!8%#}LXjV!(NQ$OkgWTxAu^9_9
z$C4u4j2?|EZ?-$ct@6fD;cP-8MyskPE>Ceb5*9^qySB1=gn>(x6MoL3KPLL$&TY%3
z#~+;6eNEzV!Qa34sq@w!8-xEsg?ogb-Pn#Lg1Si*8+%gFQxdR`^R}5QOeI)WtzcZJ
z9oXB68GgS<S8YsU?>7Pq(RCra60xXT@RaUCXlvkK)>UV1lXbyngu|}Fmq|(udiXDx
z%aN7%FlnmPW3Cbivr#z3b#))5lrEBK^$fmw!*W55pZ2>vnU+%QNjWudT^!BybX~Tl
zppEXW+L-v$n0KN`#xRIblpm0j6!BnZET=F8VEanqQ0?Gyg`Uyo&5<h5+RLnWZHR*P
z_kRRWWb^SYnuW<(Yj(iy+tim;$tKezJ!K6goSo985Gd(9Vs*}9iTI!1_J<J_Id`oJ
zrV?=NS=u!XZO7osgi!`1Jao>!Kvavwpl!#G(_b!n`J<ZB2tofypMM@-j4i!)w~F??
z&a1O)YKAH}=1ukjfEmjZ3(NRXnK$xFYFPsVUddC1d7EkEWve&i7eH<TTLpq5$5d-g
zg0_K`)#+~QVY!OBvd8@49}sqi>J8aC3Hn#2-g)e#%7d*?^;YpdT#sJ{tB?L=pn;z^
zaaB!<6vnxn`sGk1xTkCu+uiK~Qinc_cAHo+C?kL980v$q8AqXwTMcw|cXPUW7Mj{V
z2WMntYIA{qElvHRN(`fMqL?wZ@tc9A&@oJ9D8sudK(NX)#coaw?UtU#bId#=%z#!i
z3P(DXl^y>oT*w{!T0%(`a=9Z6P+!P`!yNj!cS|46<O;gZsGd~X))tN%WTRr!njn8J
zKWRyk_&};~DMIy3X7D4wi_?mLAT#g-Lwt$3Do3+CmH1@ylj}~7%u{v2@kQ!Lw0C^t
z#$X0(JGedP(`QrW`hh7O;jX>e;iZZm>e+P5Xn5%D<I|$;`$Y03{QSifq)oN!yU5?&
z&0&e+j@?5GY)<2E{kYS&n^zKFKdcK@5OH`z22~8$-!jR_b;E8P<P7Epzs2{yV(6Pk
zTwAwTb5qS98Q#d`1<9mG=VXa4A1J*|KPui4b|!Vq@wq<D_%HVZy7r8K+UfY?2=x9Q
zx_9_MX%TDh{%my;61yjmR`>kOl{EzsgAeaDd?+aqoma?!@N9uJ!^sri_`2{VQb9`M
zMfi*5@$R}wVnj^uo!Tzpu`d5g($%q$XuY&l?GJC1qe5|mi>plE$liqbO>ED3n!{y;
zHT}J9@h5+Uo9fSiTqB`6%FS82YT@Z|ezZ4SVn}7{h2<GCS49mF^2FA|MDW^}_y9#;
zxl-#NY=(iL!{<063Ub+A^@!$|mrc4KZo%P?o}{q)%9ypMzW5O2P|nuU>}Q1k`mLa)
z;Kdi>Ez`gWTbP*WGR2v1iw7Tc)OE9_uE#*fY8%{qzZ9aY-7SXtWoFrMh)fGs|9On(
zxUpbFQ@_`<sCb%zRP$*&(ADD8MsMtwcO9kql_ZO;T1kexwBjxZvoFDCWeDgx*eA9O
zZ7tr5NBZ=7%4c+*hITPc4IyV%)jFlEzjCpoc4)4zj4AT9xAGFQgA_PWIhw5^H$v)X
zsWcz<-lO3NobQwkOa#{xM9Ck5si9qYuvnQ-W3r}94b^TzbEc**jWXUFd`cPMo)MYa
z0<0c~I>X)B%w^(H-ZMEGbW5%bE{2}UBiDPt%5HAUS16zCy!W01Sw!hJ;+SFXov&J1
zh0R~Ehj6>p&Bhkr?KQS7{Mch$-5#okH*SaWV60TfEqi#f?H(vn>w0lS-372mD<av&
z?MZJgh4jXt^0g>d(^!S>;(!BU|E`(>TvakT$t)Ud%px_7C|gx>M8DN&KEKr%XjUx(
z>UeDGNaud)=#+c!CLCs^ALzpptg;GN>i6bVnN$j_1{~42@N*c=!}bdl;P9L=yry;5
zKOQBjFL%Q8A4kBQtF}HkoPYC(=hDuiSwS#j+#C{soa@f}J=Gb|V*ZOAte`b&FrZJ3
z!21as#U9|+QqUzu)vHvb54NhM%GlE1eZwQAE%n0F%268V>=xwQD`$0e<q|Px6e%oT
zVEQb<8!bV)_SF_v$<a<y3LXzu24W@~n(F9P8kd$3UtTBR{QDi;Ye*@XR$|23-J1Lo
zQk#AbI>kSYl>^pmFT(8GQarQi>&V&xkYrKVwsmFU`gLWM&#zqrM2Xols)}}Snw8WH
zFpa%26(MUfS`rWBHRXz-Wpmuxn&p;W>*xx$$*1%572{UaV$+^}3?Z#xKno}>8o9mH
zBi7fI@q;B~a5U<gm9q)xD>ARhI0u(B<0@5xeqr@HC;!G(Lka^Zr@G0vDo0^weFSiR
zp#>`?KUFeSR6zrqL}$V=sO7U~8a4|(FH3O}G;1H=e#@M3E%V?T$F}qKMjKJ8<&yo1
zs5j~;6CPXtofzA9pvc$0HrF$<?GiFL?+P9}0*ny#1yIY`asr6C&N2IClhZrzVfW2J
zI?`4lh(;d!80S^ZZ3rM(9?iv-@Uo?6zOIOa9OPW8i$H`v1Zz1TWvjilv9Ivi0Qrnh
z*KicPC%An#S<KdJw^k19j~3NMbL@Ob9*Et-<{Lz69v~HgmK8TULuIKln3Zfa%d})5
zKJINz=}urXrC?q29NTB<QoHOTxBCld3z46Ii{zpYm#LZ(+&*1Hy}Ys2JsS@!y!q;U
zn~(Oi-wMaEsyLv(Guh+vniH|Ux_Bp$G5L03;{%7NrdS*7Y~b7d#Ufm|RvlrJ?t(C!
zcx8dOzfJZp+)x4`4>7B<mGJUMz;HuvJ{34q;?pZ%L0|f^oj%kM>vPpvWwI8CU7$4|
z3nHJ2_Hs}%uw4GPmQ}sy0bV~lMNdDlBHsfn9}W{YoN&TQw#xCIc44{D*&)DD$Q5O6
z$#s8rl*pNs+kobR4qFIio#{d=ucuG6dB_f_7wKOQw8PM6|J+xMQ4sNa)?}kDvsq0-
z^-|}CnZH=Cb1-@+6DoFeOeiX-8Zv6dG_FIZ2J>o|@=c$aD>BX$eRj5W5G`k<v`Af_
z>Nk+84m0Bj)V>~!&Q9Y4G_AuVZ7><xDAIlcdya`5)O+yHiLAPVAfq}oMcqRqqg9jh
z&^4bo?ZQ!GtV10V(T@HRCKPeUKdgEAaW<ml+}Z(cW>_uNt)$QF0Y?k!C2>UM2H%4`
zwRDc@z^w6=YfqaT?rP}^B?=dv4H6xF?vn@6m(IzR@@OZI09|<vqC+|%sPTs$>8&Y|
zPbHK84FkO?Yx_Ifbryf4gJozgxDaTcnSj{+dcsFZrx}7?b0NLv3(LVW<aCGbPj!Do
z`MPD5_*1_#d=t4UK4XCTs~9pgeAxEKke<4rX|-fFp=g9qK^cdB!|u?+=zSTcl%YKP
z*lPNL2Vc`Ehyi2!VxelnkEuqRIRc9vXxVUP`V<!*Ou67R4TPU!Ld%^67lPlwrn!lD
ze3aX3G+n3;xy1)2eR!E<s@PZAyb9Npr+kMae>H5DwVz@`fz=?&XO<Q@YcedyxDuS9
zyVQB*enW-})o_ba4HOSXu^&Trd81bpUs)!zF!92YW@D_~zd~PvdmtAytj=<a;<_?_
zF_eA)){aO#4+sPCDMsA627nEzWWe&c<XeG}Gl&THTTJ0sI#(F^iyYKuxMq{LCJ4r3
zA-<13!~%v=>nspP&OtajhsPmgT;BA3G8+`LB-Z($P`{GwmAeP|5K3-uhU})0=oX)^
zo5vpHOf-z!OJUupHtsEBu@#^T^$WMxNa`Eei&ravY_2_acdYDsv;&cT%7$+rE7J6>
zoDC`js1RPj7w1dyC1v$p)eOq1aClvn4J=Qi=L#LkEdeQ-RL@FL+Y~t(zY(9fv8^&A
z+I8HsH?)<A^L-EuCBsAcI<K?W0F4Vu_Aal-eD-Klz(DGC1f@oc`r+*K7_l;FDXoz!
z#mxs3o3Y{U9ugVd2?s&&KKMX-G>mYgY7tD*5w79E2_cxT>e;Wj18sT@idZt{{6jmu
zEbv<tOpqIy={W#d8ao|qRK@!I+9aG<d%hiTB%_Zjq$XPrKqfm^SQvCZ(&!(RTj3$K
zIni*a%ec5cQRkcxM+E7~j*+KQQ|??3_Jt_Q`si8}<Xz^>k)Fy`S3v9I(Iyr6rgi?N
zTVW=@a1ef_G&to)1R2OifVCeTTX`9!1&gmQfe_32@8M;a%4}ffaF;k<YHT)#wJ)wO
zZ8k;xmp)k|I~!Nbi@I^spZ>c=CfDzZqUwGF+KaJ(TZp_ITktkbdLHm+9Hy!=%D{r5
z-{;qoh|JYx^Z?I(Vugb3D>uiCf-O5+kG{3J9Vc54(D0n#=(cC|h4^?k{8)CuN7(Ea
zDD0qTrG@zZF_Dk3p)_2pXmgA9Vh<<W_FSsJn(`|F!56H40C&Bjk(#BuC|n9qpT?nV
zwT0v8%ocG);m*>%6|`oC!fkuSThct_vnLq0x#}ByzzbbVNI$}KjT3GF9T%9EJZZ@T
zq~FH|;!Mx4dm}P)YM8i%l(<@2zfWs{!dH|w+{pjf=k??tASvzTB)Uz{m)*C`2K*#G
zFK72F|5uR7-<&=R<H;U>nI+`7tpF@vU9{MD4C-aiD?@Zja|@a^9B!yh7lQaCLlf*P
zM0D+|D-{muQe1QtZucxpZ|@lcR+C$Dn?5Qyn3`GI*k3R>Ov@rhT%PM{i1-3QHLMaR
z?)dwL+bHKXnWZNXc5WUw3^xkv75{DOabf?;P%stYR+n%f=uYESvL2C^buzWGyU<zB
z{;5jnADrJV|2Oua3%Of-v9lR$&lztoN>NzPjQh&|%%1UkX8>r{V|o`Xe|0SsPKoT>
zUb1FoM}IGv_th&jPmQ=&QBDkk*o8>20sma{Y308S%V5)=7x5&&>qRNmNwFMhcW<Kw
z;CSqRbqt!VuZf5UqR5ctA;GkaT(52DL1Z8I@O3F$6_~t9ge<;`9!DV-*cOr-6A#Wr
z%mzgKr_?s{g)oG!i3x|Dy+O0(#04LJ@y`@L!{$D&{3DHcBZ(ESoOKZ6OJt8}K?fn`
zI$_1`Erw@Q*N9+MQ<wHO^({6^B+Q7oX`h#l02vWxLZ$Al^FyeY8Ud<uRLG&tL#)qB
z4NvIY(A&3uTYxxKo>xLyT1wRLrHOvv>;(wrPcP{R`8^c$$H=3a0KBp>Fmk}`9zysL
z0#u-U`bO2{#gzG7)DCJ;NFa{{TNp&s1q%@f!tkhLpK3kyc9!TSF~new${3+P)O^VH
zGUz7Vb#wDW2qJ&r+$LvEJ(TT0LfFt1(-s>CgKY2o;=LuF-cc7z2=bL%jA9h3YlPZv
zLCc1^C5bl*FG!pBqS#WF|4Tl!mod*|K0ep~aSE2NCD$Q{H3urf$v#7GiRny`yLjD4
zmfgHNLuS?1lS&~yWl1$#SCz5@D-ge!htFhC4-sI#PzWF2ND!I-v0cTAn7)YIj}7!{
z0qZr8+7my*rmQu7dt{>+SL+TXyd5+G>t(P}u}VSwKJVA2D0Sl+x*uZ84$<_+lm#2g
zh4>6T$aJZK@%xDszJQPa_@CjLzX`0D|D9@L1Z4pC291?@u!8A^zn_y#G`|sX`mqF^
z$KMYGK@o!aAxMt-6-fJEUS%kCGwXe${4cQr<<)VFiVFZliu{K#lZSx(56G1w8i@|U
z0YFYMmM2M>jl+bL5`zacb8*GNfPe%em}wrZYci`AP_g|lI!zCC-J5BC2X;`sjJgi!
zKas_0CoNm>Y{v5Qc6hbg3Ij_j<g^vQ4uE2ml^RuO4JrEsniPq0m~a%RBw*l>4WtC5
zsMAPrsIbO=d@Wlpi6$iMf9`re2K0S@yngTW7{SRrR#jEC-CU00!!%*PfRL}pZ;8(D
zb~*+58=Temw2JANzzi%j<Q4BJ2dyM<F{NWp12zbv5Trz^UU!!gO0A1!2Vmt8HZsuD
zYcXovO$%%{Lb?b!AhkHvf*a}5Z`3xRs~K%)iCXO0X%7!0i*8(Y)jtxK2fEZ;+SroQ
zZpbkAG@Dk6nCwFQ|1=c+1i6WHph<~cUz>!&VC>Lk>o6hJqQ^$IWF~1!k8nw9Pd7$!
zsWNXCRcVu<$5y!EnaK&4nPw;&bRVk?M9PvTU?7Uhv4>NRxD{itr<jJMD21z~)EF<z
z)lSp=TxyC>s+tc95f4_SzA;AJ95FO#)%>Z>tYxe@x{^z#E9u-v20Ar2NOk2MX_8ne
zV`6ld_z_cFl&5{Z8Kv8(gBHuXh$c0wJ|5|@FnyxV6x)6xchVMWyev<Tt5<cvWT&O*
zqs*ZhZOBAZW>4?zw1A%23SGIA(Hc#^Hu*@26Sv6LS1PD#iYPBBUpi%9N+8CPT{6@A
z<9AW~NkmK;C#S7KBd9s0z8Me0BzM(@uG(&+NSzZ)q2VXrUbzIe2vl0M>0~kjrDCI6
zXOR}u`9quPN{hYK!zLPS7mg=;Ip#AfYA*dRT*%jes>0Ia=)dEY7<y%4=UA#1GfZ03
zMVM<CJMtj13o4eQ&w{5v`ObYJ_pXb0pk7k`rhHdAN4ja}pCEei)H2hPlVr5JYo%y<
z<2Re+V|>R-Gbmm=OYI>b^TaGY>y2bZ<$RG@rL(nx4nmJL)*)sFYJu*!8#S7#bVP?P
zi_%$kWZIpf5_bDxwb|uQmQ&gF0tyzxtji)xB?-9+HPYh1qgdm!RGDCK?ze6}TZB>E
z#X#j|gKktjInWx_uzn)|sWvh?cPfVRC+7j@vWH&_VzpunW3%EaLzu@{OI3(qm8LGg
zqVB4+#6~dCjg{H1I3@0ivCY8EzteX#OKq;wcF<p@@^7RI34KEBa96UUBDG|gP}(1t
zGWu3{tUon#OiiBI<DWJ&2)?v^@-bB>_Er2bzrpc`z(JLJ+`$1Z7E2m#{8u;3P#c@l
z+BxXiwm4%IjHH%qX-KjxSV}o4^a7oj1!9X1LIbG6oWEs%AfwEpe$2OU8FJLtS&hGu
zP$T++@I86FE^2cb5@$SUc|#a{qf3s}WEZ3l>tG_y8<($*JZUuNb;%q)6?AE<^$XJ)
zl~v_hqChayP00WlD_58@=Ka#e#SM&DZ3btX5kyjN{E8*(U8tdV<&~RXq<6~-GS!wi
z7X&8Xuh*A}zpZfic@4{s6n+w`uPbK6>x~j-6kt7@lfjVUc*DA+r*0|<s%9qp_61dI
zpT$DS35TTkCt>Wk5Xt#`)Zc9T9ohqy62g{Snn1u?mEfdPNBgD!UQK0|-yKuGwDoot
z!N1#t{2ixC1=+o-rmNYAHwyge4+>B`?cIkMw1BGK>Uy*@R))xSBLq9{+cAe!(Igd`
zOKq61#?u{*u|vGj9`+e;c|+lS*{;J1!bkogGrVe}R^rOI=MX#jZf)9OlM5#7rBGL^
zJs{7!u<Y`HRhce7Jj!O+8!K@gT;@6Tb`>LkL}U*nq<3^#%{4jjVt8*o?RSHI!w4v|
zo|f^mfW@`nl~>%yZXwJ3Lt?%UtrlCB*qJ=%H8>o;QnvTZOrbkNVMrOzuBBi1WIbbQ
z3iN_nTAYgEx5{Vx*wViX=S$Q)ZPukySwZXQ=Qeq($6~+COUE>qn5I;mk8BhPl`^QV
z{Zx&TE^n2Fb~G*+M3KhLqDRKfTYG-$HD0?_+S64ohC1hKAnih#6+gy=|0|?RZS7#F
zLziwXLL}s6P_mqOKRFZ<yR-jed3NHBbIa0XY+pA%58;EwruWG<RWYqq-c=b}Hw}`q
z$XOn*W|&mTy1~R9$&8yu?o${|x~Nr{q(hmLI`g{=d~(}vT1abr0}kd9SJA{*95t(6
zKiX!hbEM_M=cT4_(p?`D%d0kQ0fZSf{cFb7ZScDeE9kgaDmId{x5{Y;Sui$bu%e@H
zyMmoX{b=gezbfE0e;hr|Vt)<|1%U)CR5;-aNvcFiF{FNANU~D&8!>*a{Nk-3;w+T%
zaul1fX7X$A-#gpC<8b54hS{?!)N-TZL&!fZhO#C_TOjjt6u+@%)dg3!DlVmoN2`Ef
zgh-Xhqk&SubN}VoLQos%NxwkXf|nPvTG(n$sGGo5h<GWg$#LkqAYl4byay_jhW*Pp
zK#^F!&lDUWeb31R%(A2HAj~5<R|<AA&mwM|RtA5sd<PA)F%I|%BWoQsSHYbhl@jG5
z+`mGWhr}MadO0EqH!+WQbi}%#Xdayfen3b}xU8w)w%H0`ke)jRJDGM439RW40p)FZ
zZUJF;+D9sZg}$)=MB}<DG(l-ldq_t60Yn3ys!OYYTfmYq+NYahK(ueV`w!%xgINF(
z)Q~USuGoPlm>+uo2kh<Z{wrWtxnZ{XgSMMD5CnFw3~4mbJRH&v{>eTdTyozA=tbX$
zNQB52R5(Bf`=siJf%{hqqYUti+>h{nk%ixb@2^68!VWe%+|U+$1Htu2z{Q92_BFDM
zt*YUm9Ab)sONJhqP;tUd)*_1iKz*Vaxccb`AzW8*(%bLq4bO~xcm?>EwjV+y0DI3m
z4lze$85XZ{`<Fxra{CDKJM+Zx*LHjGo78?j$s5hoSL)g)@06fOr*H5Z?tc6azvL64
zgU(CXAt<u`o&8alT+lwr)QUuh^FgFu#@VkpYxK?7w!&I#{LOfeLPIwpv5L7`9mErB
z#C>KhGla`OvcYH4e~^~}LXyCC7(ov~cNbsOJ{YBcQiYU&-mAfKxPNGK0a=5ic-ADr
z`J`{C_&4=5?Z2U4!Cx6DhC}SYgrx7q1wJssI-t)0|MHAUvq8jxiuR16(y)eD78;%s
zJI+)%Ldy3-@Ot7M<P!73Bmnq~Fd*7K0wgHKDnVr(eGnh47nUD;j;K@x{y?A~gyt8U
z!8hAn|3}T~{#p-%2>ze|%r9#HFHU>Ubb|BsAP#$vN4}oefjg<6Rc3!#w02{e_MfUf
z%g#Hs+bGlkB#^lw)te){5z2ulpch<zpc%!N1)yXYHj4>pi+I2btQUn*>)?v@1@tQ2
zQ|0eau^85u<r0i)174vAJiYGi0DGY~;`>~Y=mH<`^vge|6uSxr^#+ms5LR=oKn7DM
z2Zs4Q34_u@$RAwS6BxaO;aX(pMl5T5@q&4SZjeskpf@L;aGGs`0i1n)$YMUK;a<Kv
z0hXzo<1qu3F({t?W?!!Y>_4MxsydPTdS;M4^7k_sM6vsK5FbPX2@QtCXPssk$*_iS
z<lhh%mWo}cK&$gpUc~k^7iOsjEXs5L$cPiB!_FZLrbxbf2p$C5>)1o4z;DVo)_g&!
zgIgLR5Y!v#K$4G?r;^|pXG5;e+=^rF3EySs=Cx_(Nop5)^c~Dh0_<$#HqIPj;r%5|
zFWCMVWIGoyYiqSXR$$UKfde@82xf=pa18EbZ{A@!X@ECm6yZ$$YnN64XGk0DGj!k8
z+k>AU?O_`*B`Z+wb8j8HUuDbl<L4GVekc1Kh%UXWEQvp)cfe)suS9+MTM^DIcpdci
z&ag#E!XS!ihh|_g(zz;11ONss;o}WLF}6O6ifL3{=XUYGVM^!LrQlrq=N}z(*E_#>
zi~RDTc-S{B94&|;A@hlZoQ?qD2^Y<o&wgum@zwY7cMM{*drv?FfVlL{3$YIXhX`TU
zYt6Jz+1k;Cl?~azoAIOh%tbZzr(>Vf{t1J3SVDKuA-nB^Sifrjqb?w(wF3!=c%Q{6
z(AN?6Lr;7n1w0-=3wSq1aJ>?q)fL(pr1<bo=N*Xm&N8t#Ly`UZf9`<XyOYEO0syc@
z@c+s;ga8Qtm2U`xq{LwUf7LCz|Jf9$Vi~M}JENdav_za?Li@^B8{Yy;kCL6$ZXT_$
z%uC2PB%=P5mzKYMbG(|8nn9#o`EI(GJgk~~okkNE*dg`>7cZV_gj++64%4U0EpB@?
zu>~<@2Iz}g&o5c15&nEz2CEz`&Mz%9y1HDNH=hLtp!IcP#5e6F;dqRHv)ZG5F%Dgh
zo10v9fcMhLi(Bo{U=iE%ct@~EbBD0&Pao@;s%ek4h9}n-8P7NXTJVhdIs3<dSZY{|
zzt8;}6I)9`b+JYEj7Q}l-0xbszqoxt>cgBucrh<oc)l28Jj<E^+l$-dasy$imcDNv
z|8BQlK_C2EUfxV?bEERBI2J2(JB+AP;x!q+=?V~~7T?rXpiMy^O#NC?3Cq3KfWKRw
zfu*D9`~DxZfuBO^-bw7X!Mx!E?>`#&^!(GoMbxU6Qb$@N!@&z^+u~I^$@OE>TLSn<
z#b9B_nrLF$#yv)iIJnO;2gM>`N!}K~zm_ptK=-(|k;BG##j}&twtvj{b+aj&YA~D0
z@%qz~g9R3{hw)z`DbTVLi%2B^0t>6D(G9r47VJZM*M?xMsX({;Ht{2ZOVhhgW(Z%u
zzrRbTouDv)9do)V9cY#hUJM=%s$8>98@DpTEAE=iJWL%dH^wN1Y4JD_bd|K`?STfU
zAhb6qfwjxJM2uloD<V(K%)<wX=xxY9=_j92koe$-tu0)&N&t`iE?$6)_c}I}Y?rG!
zZ7WpGdHc0!gB!Ps@>D$k6Ht5KuFVn~_HvW}X25s1`qfSANw4>~y*bzl!4sf$A5k8!
zob|)lu=VGjZT5~-tZcXO{?y`BX1VV#kl(c5I>Or;4j9u1V>(Q}N-pXQ3JOVpCk8Qn
z2FyDokE;)}4g|8N?<q7qY_35l##+4H^OsU6bQfijasJ&0;VqOT8~iyLg}@St?`fm?
zpc<mr1ee|B&yC?z8mnU-4<qZp5wtBG`xExR2B(KwM1$*qw$Zm4ETSl8PJG=_pbR;6
zbKVkZ<|d?rjA1=nI68XQtY041Ul;~FkhKAI{FVJs_6ICLT#%x$ahzj<`-H+PsWuV-
z``Y31+tzXhbd)?N`E#b~@oZ(=>ZF@yz+QzNR9+sgnfbww%#3g<cglYh)NHc5_oL?S
zs<mrF2c;X)mrpn41_ibFTq8pj)L3=}wIeKTD{|V#RMv0L@JLNwuX7Q>68bkj>|0dl
z156iVzt<@cX6$Xbk))#Lb}Bw9fSp8)U@$3Xnsj?gLT|z`ly7Ux76h=!xG8ztj5_V)
zyLCBA!gB*zE9VBuM1(k=id?l7y4o-2;o{J(1{GW~Fb77_t}78vWGYI5?)tgsLoFrY
z_T0|nMu|7WJ|y*ijob0e-QV7E`3uXO(L9lDWXKKeL#8{)id21FT`(WI>qZn1{ks?a
zZW{SgTUM-w{d2nXWitI%&$cx0pm{3p*v<PQq@ft(yfDq(8JFBZa?)pJj!Z~;Z&h`Q
zCJgkaak?L+aD$kBg;JmdQBm9SW@bGzr9;|E?f=|8ewfK0A@{ax$`{FFyg*pqsCEL9
zWg;mQhSl;a@dq}LAmINzIw9sm@FLQU1suYtR7xtdZutJarDsgBrIuh%S`v<7T)`aa
zThJUSK;^<er;Zp3-q+FaoC?yPvJD;9eU32yff~yHEN#}aFN8=3(%J7BDI0q@@x?FG
z5?VM{&{Og(JsNgdw=j_Nw**J^4bDdmhMd|O-}k%9N5bvpmPFYxvFUOA15BT=+K5a5
z^*d>asRIn{o8l9`0&hm4$(@%b!Y&;D1J{zs_;E)r(I_O-xTEXgt4J`LJT!tIwW~IF
zqymi!b@D0kt^c$8_ak5K%|Db;$<$GU1G#Q>H#u;t-kJCbtg~_;WJL+y3S^UlY5RsF
z4p>5RgBsuekaNYh!L}xR4&k|95Er;7-b6w@1h%AyirC_pURk1|jrQch@POyy%`$#z
zO5?~+J<oP>$C>4RAJkApTm;eVcKGerwS@D{m|FI0QeX0daM=8$_U$>kPSfWt*hcg0
zcpDzTdzmOOERr>hY62&ccZ02B1L0x+%RL}h9Q+ZWeLs_(Xl06h0C0F3mG+V)$;E8|
z1=lR%12k@$&|A=#NTtY>#a3IqXMw_)K?7^TqO;FjM?vR(@)T14GMV3egh|*@#!2Id
zp_-3a;YW`^3m;$)GGwdXNU%R=+cpVbOQ}+R_ot%-0Mg;!Z3nsjr3C4Xqc|G5lM=Do
za<EiURo~~mHqg0bJRRQ=cy8A`xZT20Z<`6EOsp(o=(8lJ4#rg4U#ByFvJjfFWadm2
zSqNuHUR-rD;LKPgZR4e^_t0fPVY~jko}ICvc=7oFtp_15E0p*Qo8000+KLMAoJ6?l
z3ZZ>eIp2AyZYA#OW55)<E`%9|4<GQyDzMra1f~<oo37cCUu|w<eAqfh57D2NS?5a~
zxyDWYSk++8PF~<vblz?)g{g2QVF<&H-*IpAOEB{~NsUxuS85GKi3!K@!0IrHm`%N5
zZeULXH9vvdQ>xCr{OQ&K9BzmC)fCTyD520lGmUJPK1UpX8nxKjUbz*f->??U@cA3^
zW*`lr{>7PR?&@4W7neBPg$A`&nHYnUtvcK#CJu-qtK}*k7qB0okBjzFJ-RKIw7XuQ
zEc1*KbVLdecdNfE>FhxH_@tYB$V9TgX<(*<D6=KxIb=!DXRd7ZcI(>C@mjs*93Fft
z<L;W1y(+js`VL8DZEt+7P`5cf+IdRUbV*gQ$tCS!0<u0mNc+~ZQ0i;OjfH(s%1>+i
z7bvs5V2}c+oq}g2-#lj}>%Waop#I(H&rb7R_@3mZp5?k_x=M*tudpfkSd792b0ZL<
zeXZQ@{V!VCUxUVD4GRDeQ2jp`c(REjs5BGKjbJ2e%snoC!GuugSl%&#!OpxS$-=(D
z0+Mxfz09Dk(W0vnb@BhO;MICugFL9lxamXCTv2f5;vEAAZuXI1usJ&v2{UFB*-Ol%
zFp2(CV&DNei6M3y-)lQd>YD7Q>(7tYf4RT;daM5S)>r+VB&%&KEZqM@$^orM?*=*8
z`K}?_-|n<{d|d|ZVxHCMr7_&wB_bq;ZJPUqJ)2mNa8yNjAP<kt40qVP^-PdCP*FgA
z$HEOQkHtD#7!0#~aOo}Y-6~?))hb1$<`b+vxOV9AGNZA5AhNI2HF)VHI5un+FH&xP
zD){<np!~WQnU7U(#z(WHAOayseXK%7JXMH>3F<bhud_xvb!aExK42hFRhU<Dt<hE*
zWjEv?<)Fj$=&>GE&Rh3`r_wJ)tJc+S<uI&G=HUqy%;nU~xAAr)TZ*tshfw`H&&p4G
zYe3T&wPul{A;OO@ADwT}BwH#5Zypst^^(&jp*Q=D%fX1>BU{h4Vgt&9j(Lv>y>5#Q
z<o;QSDfUEF-O{S`Yj+{?7h=Mg7zcTDD}_|qnlVEOeHZy@#fqmK@v1uC7e;)<=-3m^
z-x9v0Ozxkxc{eTGw%KsSUCrngu_CK=A_AO?xKKEj<#yWT?EC!{yjagvb}d}xKk(EU
zc*@%O)q{oDQqUt4t=>R3y3{rS<pN(Zh*o)IEJZm1wuOoV=c{98e9>#Riu%q|QNcgN
zwcDVJkN#F^(B%iCtlDI2j5m!*qZ`-DkYS=EL^MDS#44s&nv}Yu_}@7=XnQ0`i4{-Y
z7XBJOv|OcQ@bA$g=iED0<g#uaah7<_+;jY`n6*{R5c-g^12+r$Y(`kGD}Um_!HFLI
zp)QUNk<<V((KE)5n`tmu%*icsmy0W8|3&yj#oNlwtND16l`P=Ot*Dn@UML9aQh!*F
zH1!m#mVdOF%P<pZ55BN*+xg?F1^5?&yJyzQEi&^a*}V1ArtZ7cYEj-Mqb=3I=ZF8P
z(Q+gCg{$kI|5pdpl%?w%U(I)=(3|Z(T+=>_R<@vv<GB%AGvt%f+=^Sozi!bSzg^<+
zML>)sJ(zyQ+iN@HF)EnH<js5{=oWt+t08}Jq3Y+Opd6?0W*k^#K!Uq}?9vsvC%bo>
z+9D6vVbF;1qC-BEcm{70q#-%<WK&&gn9Yi+0PzoowOa=iVFGP-U`%(74EJArk3DQ4
zL24f;lap?-Yu?NyX^SK~8#_Gu1N*V1y3pN`^(l3MmyIY&DrDR#k969?iYv@r%Ct-<
zxhXk6NS})<orTX+Qti|W6|Ae89S`?XK_RpFVf`Ygu1*O;elH0&+yh$LEyQVblkyM6
zz_3J<q#Y`VTiQyjXX3q1oyT=^=FVf?LATW=%V45vDQ~Bx@NaAUHFD~PyPKyXdsJ)<
zBFs!-t75B-d>-FDc$cwpxsZuUa>J`)pHu(Z?X}O#{-?h`!pFzf_#QT_V=lEpZdq-H
z)-ZO|0Qr8G7B|MvT93Y3$7CRPzvx)#Dj0=+*P18@XQ$4)Q|dsM@b(v3&0TaT&1r7q
z`n4kTw!;_}bPYi^BXyaEQ<Hj5ejc_JCpes-#XA>tuZrF@zbh3p?m(l226v5uS>dx#
z0U=eUDDleEvu}z)-Ln!RgUjPIwjF{)Zr$_vl;!_d##aEv(QM)37Tn!}+u{&NAh^4G
z@Zb<!7iaOn;vPaENN@<g=nuhz1PLBAL3Ys%EXyPJ-uH5Ey{<ad-QRrOT~l>xrf1IS
zFP&>^0dKhtZU|^i@T5A})e3YuE>Bul-T<->^6-5X*SJauKOH~$vdJ&po}I4aA?&y-
zVmVQ8G8TX@e9VUnZhYo2ojxAM4GRK0yGDhPxtat_7k*>H2nEV&@mKm))4j+~Nhf!@
zkz27&XR%-g9Oe(JoI)nAc>1hoR~e$4NY-@<d#pC_a!-qTpQT4dy<&YgI*63HWC9*1
z00aVcKK({0_@FNqKG(3<zw0FA8fjh#?k1CaaZMF+=9B&-tvj*hgl%WC-9I1=Gx#_g
zh^ITJMbqW~{3`imDB5@Jy=b7{xQqRB<?M@R#)ba<<0gsUc6oy(%(=<&>md0_yXjbN
z8U_7PD+QpYGMeU@Ip{9Ms#0(uOb$4jh~?9-SU{HWoLbko(*&ja?ljr{8HrNHoBDRS
zG~2-56Gy`FwAZa)+~`%09-nFN?xed`vjsHa>b76vx{WvvC99-$OiK(_qW0uSvneHr
zlT=1W?U0(=?UokCdnwR|Q{8-AD96karNc$l9!<|??2bKdhwr55+WX(GCxbu(_6heR
zM0MtT>Q{J>XdV7`nnaEuXJe$>S}mw(%%^TB!d}_W77%cK9!A&PQ@6Y3=8KpSD|CB!
z;yI{|8^PJvKee!+j5ZxVAD1>Enx?yr^N)L#wx=o0fDE8^l~ky8oukfF-jPj^>>!Dn
zy-|_=fL|)__pM%_)ie_9iv&^!sQg+52iAqIdEVS^F=%%Du$1zlvE*~^6C$v>bk;ka
z3<dvMU&6i?a7t%!iwnoR;gG<&!L&vA(FVq+4S14sjB^*c2n5cL)|ttgk@?$d;}i=|
z?Bz3jSZps}Jeh86f6xjxDD=ZG6Man=ry#Y~*RXh6-M0A*XO{`>LpyM4kYhTo#zF~i
zUWxiK2mk0I{&r<Mq~@#Vm!^L_B&C<ved_%US8IwdXp(dI@w|5I=?NufF@d8JBV+A7
zUcAHgA}p>({;XNPv*I^WGW2TO*FOUYp0)v89Zb?PlAD#S{nWk2ymrfHgh>;n8J|2c
z_FUS>Mcu26Ud#JAZ!QDmt!!4qaIz`#y+_#DF4pfB)y|qf9E5H<V->g+9Ea!bu9cq$
zZC1;FVvzAzQP*+c#blyoAg&j*Ljh{L6Gp5o9}ga5wDCV|E<fMHAizzyL6F79W9H&@
z27Uz46`Oq~I(YHCWbOoF>G0F(&2j@}?C~dOm0^5*Hr98=dh61_*d?lMRQfmN9V0>D
zr^pD1r>Fjtg=oJw3%7&7_ImFu@-|(5{p3Z`#rHWij;@{N??M*U+tfe(<NLw<y|%vF
zqEs8>48cza|0F4+;!5$F`9&JIW2KZvF+uN_xr2{MQYKf`Q?9625;~t$IiI>~kyuz{
zIc3MW1XygpjwK*(0S@el)}le=5Q@0chM7)_Tk}cEE!QusF<`0k*Nm~WVaQbYZ+YYB
zL2&;WEG{`osi`NAj8_#=@m6dw#T?{Ie;xg+e}XOi8Els|`PaaK>Mh<5(UW{sPn3qI
zB{mK)2PmPP=dy|Vt<ms_(lI`i9Ez&;kh6e%KTJe!7E_1^l#vf>1Tn<O5Fk>9(j(~x
zM%nL`cHWEYU_;oB02(I{^{{f#J;^OTw2%-&P=brBUD()UyMJ08;h%Wof>}>>+HYcO
z?xESj&dz0_H)+02+M*i}+tIq*>?pp>7SWT(5VL_1kq2M5TaNT+`>w8-TXPYayK+SV
zeUv?@H&O#SGd4kN(<P}wK~%Yo*;}1k-xCkA8cklf?4vVXfqZ>5=YYpFQ-s%;)zIV~
zc&6b60nx!AFnd@hxT%bMr=3(BgRYQeno~Qck!&k?i_oqmHk@Eh9%;dId&8q9Yf}S>
z4}wfxxTL9%G_La=nPesTBK_4KDi@IN^gRVjgn@cRYL-8mlsE9MNtS~1(5_>sNOV4T
z60GqbO?<e0F<i_TKpBJz*H?p7r&xfy$A?`Yxm0I>ACnQ0Cf3|!EnfjaFtO?>S=dyV
zC5M2P+%wMUkBvV*V$y`l2nX<<x7m0YP<^bbJ{f~BxS{~17vo%WyKr=>6wK-bnp#Dm
zcfkCg`u_Q=1m^W6aW<g~EwC`c^Ba=|3S+FZ=r{Er>I`Rw0JUuQVXYmju3iKs-^_KW
zKGt**ZfaUJc<M^n0XuZp9Zm=fVQaOXO(mbrXTH}tPlfc2g>2McS5w8@Eov;F{>qip
z)n#ZdQTXOdJu{uw%vQXXSTrU+?-@1odIWf=9biw=jeD`EU9T9HoUoqs<piblXm1~|
z6Zg@$F`UAMG7^ryb7W0<U1`R-m!=LKwZ6s;Vi`e&)$xdT^GtPdwI^ECt~f4wOTU@o
zflkr}SZEa#6GprZT4B3hs1UW#?5v_i=<-XOCO=o{jD%lFwO26Gj<^uO_d?pMQGxN%
zL@|q1i_Y`z+>S2gWIxXuWuuU_@pNO-Z1+{Slsyb{XTe8S@4l&(A1qK2W0e(yUjk|X
zRV!v?%WT%)wfq(emKehXN*(f}8K<#Ms75Ss9^AK|i5|o<2@x?uqGwvlZ$+9;0a5OC
zZ>^MraY`RpHcZC)k`H`g`6d%YpMW2X4Znk!cpUgmPqVjBU_)`oF3(F7X3W)?KJDd}
z4lg}fvB)}ys-2h1?MTNRvwf15A?pdsUpNj7h6z-!Bm}|A3@<{5pCm71ZY{R%5|(0H
zD<lgXJ;<%;#WIk_phHd$Z4aqt2R>>qxFzbW)@3G3Sz2mh%V^eA)cV^YEP)F2E&f=2
z4ceyP?K2Ur3-=epy(7mb=b{!mbo8)xJ8`sdbhwq8tl8+4Q5hp^`z=s;J9U@ws;DD#
zSUP#duVUc|i|6);M`jQm%%@Nyiirj3Aw($bGC}g7Hun$*2irZynLfN{--O>~!l|tJ
z+=;0m<xyn3bc&@lKSr=ff!G?R<WrvSzMk*!sI95@w8j<_Y~UgD;HRY5qCs?ynO`jT
zFu_Vq#qeoT<EWyKQMu=FUYxKc_G1O|KAas3{cSM=uF+sqsvv1e^`5bzY3!RQueX=?
z!}HJGiNl28o(ay8cLO@Aw>ODP!OK`scE5n^%f~Z{q2BRoEvQg0U?m7QV$RTe?Hl%y
zq@a=a@?vIBK+W<RyMa;yJx5<n)=3Ow#d`u6Qa^w<2o%qGgkAZ@H#mW9V<MSIeMc{W
zC`!TaaQ{guGt8Sil+-Sdkhlj<o2cThmjLIrMzk#)dnFEiaK{eHn{Js`Oy~_M__(PW
z)|1tX&WI}eEY%GG%wv<$LWFVOxzOnI!)<;KKA~j^$ZaAZeo8DMn)Jd8d?~Y&CfttS
zgI@HSJZ3fP(kj)`Ih4%@0I+6HT!QZSX9m3=Lf<ORYWh7$B$4#%oC_ec!Bs!;=p?GH
z*MdHdG_~j|1BI2zEiK_)9}_@CSxrM*LaTmN$p*5)Sa3xkO(+%pduGmvh7^i{rXQl;
zGdENa`ZR@@FXXCF6Kvv2DsZzdc|j8sX6%xf$o2|g0z4?6LKO?nEX3>lYk&hTwlhD@
zA6|5QPa|I>K1sgEEi$rCNlaxVNR;N;-S142Yuvc9wN5^oDWi&T>GWUdCa;qY5eY(b
zpWIESsjL8|X|x5B1ukQ&NOi1<8wesv)FL!%kWY8Id6)H|j8csx4?WTMq+!n(&YfXi
z!cMRpii=F=9MgsZs$XnhRS-jX*Y;3rbLO2=!^VSt8_Z3>Q98+{nJq(n12cB9BV9P@
zZMI(>__CA)=y0*re>O?FziXf-P{eQCz}dGlaSFt?!L*ite%dxYXc7Y`m`a_>8lDLD
z0VJyQ9E-L35v<P!!w5-BubN905YnI<=uS9W->an_%AFrQ@Ysc8k>P|MKHYpL7pM;V
zeQd=;H2g-~Mg^edl$@I!&!Ja3$2Pf%rsnBIT4*EJQ=TaV-Hd7Dn7%&I>LI#<&AkNY
z0V}YAg*11r)xRYuH&lJx9wDh7l?2dc1`)ub)PM2#p)-(yyc!(R<sl?t&fpFKBGua%
z{ri|<*&2ric?d2dYiM-f?lA8$kNhdpmOT{1<n#TOtmJLstQQt=jP8LXv0-J0pWM$^
zvdJOMbY4Woy)y;=GJE86bd?kW1<uqDw91SSfY}`1+F%n+kpS{@)o&erF(Au;FP~NS
z>DeNhl6ujQSbK~+Cbi4ox4h<ltIgSPB5!zpX@^D%P^U#SCs{8IdK`cRU=nH%FKZr1
zeBOntASq4tFfMYr%EKyIjUWBJqTmH~UvWE2+|6!-`p<0R6W41zIHC6!OImlJ*c}D@
z`__18xlBG>x6R-7K?Qo;84&6v|CrqSt^&Y{Y?%t2<B&3;)-=qt*S`{lX^0(r2(G^(
znxU{i169Z>Av_80$lwAY{(I;Un;{vDO|AMLWd7Okpi~OBo5;}cfKbe>&?Bpw3KbCD
z@eSEc)XneM?X{<0Zqyb_VTnMY-=we#b6(y}9h1<VTXV}fk{Q3`^TeYobhQWFng=1+
zxp@G~CH9fPqI(No1jw2|H2Ib^;m1~tU%&hQV1RuK^*k-W(~FYGLZOG+lhVsWI>@JL
z;eZ-8XD)7;0A^)qNR;?yh7g2@Bw!Du&laiQf`?7MUom~Kc-aCUKLiqGBCT5*9Y!t`
zS44C~s}kYyQtd<t>L4K)QYt_K?jz-|9s+>*U(?la_TQ%=nJ*PCv0O?H2lUjKz&}S5
z(^nATA!N^W1Rq$`kR^uqoWqGB0sL@5IOG7fvX!2BD+9Z_I(CARj92jc8Pz#&qR6Us
zRwykrurs#UKwc<GSGSFLo)I3(metl)Y-%7rPN^JWEU@D5OJpLt4DVwozplIOJKv9M
zD$hqfVE*}sMjVWY$0kETkrnw@h>s72=`qod|52Td?opkMzFB<`75K}JY?Pv%K<yRo
z4e_aMKL*=VjlP$p7#w-h^ur_6)H{-M=Co!{YOSWM-HX~?9)5G5vwEz`$ZwVr%`%;A
zB9HU)*B7T<9l`C*0^QQi(^rp{@*t?3o|=-f7TTp-Xj2D?=zy0Y?R>Vk!FN|9zb(?w
z&NI+Pwn=2vb757$k=2URCg#hcE;_7UK`v9mh+NQF2&&r@e+Ub2259ijx4eUv&d|z}
zn^N)hSzH9%cH-ys3m*pKI@++54MiFZn;&e;hfO@(e8Skza>tT>t=`_uTfgc|a{-uX
zH(IiRB|zn3a6+jhcl<suC2N3jp`Rv%vBSE>z$-!9K=TtYA??V6>pt!>{cs*+*^||b
zk;uo`+_iYJ(p7E7D{`$ik%aY-*9Yq00cNW*Y&x6*4-mL7$bHiMjL`7=lCQnX&gVwI
zWG~J(dx#u2tIvBhspn!kG2a`?L7I$a$m^lJ?JFnOi1C6w%y%v#&Y$coc(XT@XMKuS
z&M*B&KBED}S40ZWCXMz@DmZzYIJLUhzHfCOWmB`DR~o6&t2){lVUhGQ!|c3cWVC;m
z9Lj<2-)fEz892>ug&u}6YFt_wjS^00ppX-*y9uDAHNvwzhdpHiPm5?DAW_zX;>RoQ
z+}G*IZg*(!+$VWtw8I{_YzQ<LBUtE9Bh2AxAp^__A)(l69=OpHDX<VA8znHMK>ykl
zMyisw<v-#2IX}Tt^}BC(1<yZ^a#GuGcGEcIW5tI0&Fu-YSd@Nq#cpROg1+l8RfI9T
zCY`mTx@zspi5a`lBXAZTNpGDEt4&mJm}*tnf4_>Isd-!*E`1rBcCF0!a6KV?p*XA-
z(r*ZCF3O<|y%rXUjg(Y1(N}lH64#yZ_Z`Yb&8E+Kl30Ceyj-&<nbVTAZ7J59Gldx5
zBM`!-395bfHvT75`BYXJl_Z}NE7~#T8`i#)`X`5=n%3?35=v1L;`!^ml~vW6C<;&&
zqg$6%)~wy@_Zsr0Gzh`o;>=c$-EY#yEeexBu_ae-^55zb3%^BN6^r(JjhJkKv6KdN
z*-3h00>;>lPSBYnFnO<jF}b2P>+fHLLo_9`(lBqz0czuUuk!Fgv-%~wvCG%`13#AV
z+`1W)o7Hs-A`EY4LH6bpUoV7osBot~2Js}D8<VjlvCf+0kzvVdmpY58^73(Xmc(rV
z1y9D9cwX897>$odo;o&o{FYxNyId)&sZk9BRg!$r&B~`X7+KrmJjw|3*W2Yb*GRR+
zWi|b$5!9(ClrorvPOYDz#kF)Iq*LLv>{~1xyv&gr|Is2<S#6dU(MstLOPWl{eD6_+
z%8D^%vXLQaOvnwc8tn4dAXjvWEwG0I0a%pin8MrVl%eU9#^P)@Mv84<ed<o^JSntb
zG)h7Z(~_^>0{Bw~yL(G|Gq64gTQnNRLtf!5+*E$#8b7i!iHw;|rm5$*wC4W~087_-
z#EaL5HVxrN<eV-(dFHh8>=OsY<&F-X+UqxrwjKLF%2{|QaEGN>`O|vhI@4D{Kt3UO
zN4x~AfQ^id$Crac)m9!>3VK<F#b$~*I>sL{->L=hVX*u8ytiaXT)i_SONRy-PS%J<
zi_o}z{i?X0)wRb}VP-2H)L`z4=}m1mI;~7@NXDyD>&RV+1u5HpUi_;3jk4$El)&n|
zP^jihrK?EQ#S7|sw`S`qyyyV{a388!GaSJF&g~jwd`IqR)eQ?Un}Gg&JwpAk_G9a!
zKXIiVg<xX3`Dz_xnd-=0o6qpWvJ?i{vw}kwT}v5JwA?pv53(p6Z?uV;g1c|&b<DI@
zmYo{pSn%x~ZuMoU>8sE>mp1-y<B4Tt0Va*Htnuqk2JOVcCE(Ufx*0<upwS6vmJ8;c
zf12#HZZuMs#BKRi_}4$FbWQxeY`vsZ3#mIsrUe2SE2Q}YXO8aJBTmlRB+K<J=EJ8E
zNsQBuZcvvH)^+zQ8g)-e|9I=^;-uo(k~}jC0vQdu>S{-KNJWaxyW4MCO+g@9>&hx&
zS|?EvI7Num=r}jG=`s&K@II-%ebKlkIY~e*?!he;lGD9gC#7HNsCX!G88q#rfaPfR
zIwZT~3$;7Pbb3BbPr>ow-1Sz!Scrf#Gi8+5g1!2oPq^fERt4#9w6}JQi6)n_r#mOF
z6O4bol!ZKJqoP4LCoQ?pmLZgqEIIC5F{Nf3MFEd^D<!kp0PTJza52P&)hG<`gH%d~
z3OjF945#}?$x&k8PTSkPEi5Gu9gN)Syo5E@t(g2>xh4DphM3LrMbvNV?0P>PARAs>
zU5SS2{_A4ouF&s(!b4@Vxf=WXsy45iQtG+NT25b|m}A7^_yo4S5nv%Z)Bfsgu(GCQ
z=>iQkkD|_Vf!@*50?7oOna^Lor^TvB$lBHa(8xN*xF>8e-cJ<|TBv56Z~4y60j?L<
z%^&rtVtN&x7$6_fAxxv7bsA3R6pNkWHLGV)fuA~1iobg<sjV5>5j8Pr5aN$Q=u&_$
zTG=ui)V_{z@jw~|ATM^;svGuRSWdnn^A1VIP;BSHRyqgO0|TQ3@V!qtOACZPuv-`@
zv#YfudW7LmLLwqzZyhWqHQG(<&sF(EVPhInCHdepycozT={#vXT0+C;VZS0~Kb#g(
z^NiyuVyBSya0!fCmnn?P+`|c?bfl)9$APe(NtB7+8TRtl<^fdONz+A-N@y%GB!BWp
zMEe3~fnEzfu&>VJvq+D#uW91O<4Z~W^lLdGoy9^WH{GGX#LC0{O8@~>%lVpl@TUSX
zhleIH2Z1K>H)<JY<wB9~rS=iTJkD?5DE$&@t>JI_I(Z_eD!eh5gGnHbH^T=>)WeY6
zU}R-xv|$i<Y1<>B__FSul8)!Qcl}ouUA*P}Ev7M$_dqJF^kZodTcIQ2UDM~bwj?g=
z@lkeCh673sMcJh(*|gG$qG`Gw4QJt^D}&8}A9v6EW6#*E99xT<hJU|io_X&>;L}#(
zph{+wL=*$H!D~3pzSF=cSex&9&aFcG+FP5*DP5pWK;{NZzU*#rt1m~L+diR-b2g3i
zvv2yq&@PZqzh_p<?F)MR=)qy$mCX@#w*J@iXEABPrPS~4&m6djV!SXAqi89j6t$m?
zb?a~H_N5DHc;QSrl-fAME{Ov`15<&<SE$Ob9Ka98NYKrulpTt)Eq*}1HXH{c8?W<9
z@$G5NefTKHgViTV^KRiTv%9Ae*Qo)Soo&FBnlHfafyft7ZteBt((pzm2vNVBBt`=4
zr-&q4>5!fz*621yQLKSCZC|9wOYdX8qM<V%Ng|s;%nlq(u0agTXj09c3*iVJLCkr$
zG<wVVPhrc9KpUk%n<0rHAVrNg!9)EJy|vXSgzlUt;+lVMEjG9V^Yb?0T3o0%6Jx(Z
z5C#w>-KP#qXNx=S`Opku5aAOj8YS$OeXcip>E$*-Si(~c=nK9fBHhs@tO!lhD2~=h
zIXdHaF-w#^N)mnObX}@vkxyQ1ZAB5G#x`R83S!#qO<zZxHEAIYJkGwZ0!GE%fbtR#
zZmkyvC;eYn^WPV?*+3%>s5eRtZ-D>YS=qn5g?svx7C@Z&C@PbvW8b$)s3NH75ziD5
zIzBd1#<e^5Hn0ev1R3N-noLXNLqrRL>CeWNe#&CF6b}WZh#})GJvtQ%J(xa|`f*=N
z{kX5CX^8|yqY4O0h>M8{3yMjI2nmZxiV6t}3Q34Q$}~TMl0uRqLXWLylHz|RJNk@f
z4i-$JCirg&W@@le5(~IKnHsE`M2ngSZcJi$RAL5zZ;~i81aZJqNyHfcl3Er>rbNvI
zD<zB3|B+S3d^}V0#~2>%Z=pZ~?<520|LP^9ppgE(`q;`w1M{b_(f?IGLqU1|KLjB>
zuwDioxWSh0kGwLtHiek}zfbch{||YO^1sN>(aHoLyWC)UaRP8$1~E7zl@<*}=>I``
zj}Rmc4}AX!_y2+atbzwFO(O<trxDZtO%g;wdGSB1U`zfNnTP&wxE=*X=6{H0ivJ<9
z^XY1!K8i_`qu}5?2KFDz-|v%u>t6pIC#m(HK>+YcDlzy=8ZFw=t4D|h9H~L@|IPE)
z9Z*oj|4#DQin06GpjYV(VBQQo@IwYIT7w6eE>i)G+!OpN^95R_C%80|o&Jvxa6R(>
z81tk3EtFp1txPF(^8X<Jx8BD3H_+kr?^;&_>yI)vN`An<`>2nVt_J46Smckl{D=Q<
G?*9P_?HfM;

delta 29809
zcmZU41x#LF@Fni<PH}g4io3hJyZghTxH}YgcZaV)p-Ax-D||?C`EUvpw!i=GCY$Wb
z%e%?T+&MFo+~nNMoR<segKC&G5?D$o=}Zo2()0yHXo<`+Rmw~Y=nv`dP4tvdTu^R~
z*3PyrY%We5wgEm~mjAvU?p{7@uC_jwHkLk?a#C#n?MR1UY6MgN`(aBh{DGRAlbe@~
zi<6C;Q-_mJm`CW}%g)Wi#m&vh!pSMj$w|%nf!f2x(#Ou-%k@9|06~6peqJ^eM>pR9
z>QD`3L?myyVYl=&YFKJ0#>`O<8dy0g+W!rMkO2dS%=e$kUg)zyCOQl?H3byM{}-jJ
zyUo9#|8J0)*xVc)B=E%0DF2hgKgi(FOjd+DjQ{*F-&Z~o!azY0!enAWlfjZhQDkCy
zQv5?C`G-hK#Ldpj&c$Nw%f-$0pEa$DL=!3$6clPEwi`Jy4iw-21N~n;*gc$G<fOR%
zPZe?7VF_TM2|Ebk$^KvL4psz7SU7}#ZZbWQJ5iho)CRZ^p`f@BGb60XU`3(mGE1#U
zGSg&8GZ#FO{zXFdpF6<F$Y{>kIJHoLAZ)%xG7MRv5uOOmjfv&L_Q8@bpZr4ZWzT?)
zWZ9LUh-~!4QTESOuo}KjapV_2f4OPVaaZYL<znMd-};Pf$4#TFTlR~a`liAWpZhk4
z0qS7$tJ|g6dGCA?e|OfzHtmhd$-$Yz9ImtCu3y(Rk4JHc7n;D&N?Bc<)E~vZ=0E8$
zn2jDh*p>Z0+kGxsvYW^7{foT)uBHDc?npbuy5RLz$bBGWzJ4%3bn7QN%g0QD*uQ8%
zPm-^1COMJu&BzQI-~8rp&C}u6Du(gvcVS;y(=HQvvUc?WJcD1OG4rpX>X8SAK}uJN
z?Z*j&q0osV*xbzkytvH^#v!UnLVZ-6yqkG)oz>X%dmDyIyJ&G?$RBSrU5kDo^4o^_
zE~sUmG7Tp0{1)-7%x$67uP_(W&TXjs1PrG8X)kmXL`Y<;_ku<t)ZH@xmMF4dsCC|r
z+Ejg5&g~SfQCJyJhkU~beE4JwA?=)MI5(8ZvHp89|6>P8Y~1WN-QT7@fPKFe7rVF=
zv_AZ}z<#LW;m-g^xO%eHh5B(9b^>*Fh7lD+wIGPp9_O9C8?9aHaC5n((<tsQNu!`w
zYv<2sl<3{{z$as+>#r9qUVK{J(J$@hxtKq(f40BbvLim+Re~Yw*BXAla^Py_i{%@_
zsBiD#Cea2!5|~a;mA8G)P4G=w4wrG|IJy+(>j*gnQS{D?x2Vu~YRPw)hZsmjm)`MI
zGID24H6Hc^6qt&<oId39c%UJ0y((^k(9?>x&i|@|M!PQOzjy}T8|d`Wp;}0&DKJu?
z?auF5a0bO1D9xMxU8{VdW4zPI3hZ#XX^brwfm;VMK7I<dnhoQsqJg(2S^Ka9ZKrV|
za&Xu;SUT{7^StgDqxPWmsyAo2qt%41E6(ZoJd!hQ4qb!Q;Tw7F6c69Rk>bf_0rTY`
z#$}r5rN)7Y>V`$spv$f00|TS{D@!cwt$tpTs*%`XcWYR_zTf<+Cj#{~R5qb|w`A|x
zPE{*VUe4KxgnC}GW_V+N&UIx`*G>NOd7<cI8Bkf8_*x;N<bYSen<15@M!><>*i%o0
z(mmIqmh8tCk6>|<XL@CNJw$o^w4^|mK1;lOBp?4ni#+yZ2^zzZi}UYP*AmJp-9ZmJ
zYD)t>m@#OoGlD4AtDi5Q2PM7=s&$R)!lfJVdO?KG+vabZux*V=VosR>YGe!F?~HS9
z)1cqloY^ktW!EFhXWp&WpXwY>?aFE$p&UPszaGdRh~ElO-{KbhDWlK`-DYT4)0BA5
zHJOKT-wW6QNrs$b48EoO{SIR;Ts7@#0XFjcoN4zZ0_PQdypkW@0FHgRphBn@XAT57
zao2%SfVP9qQsgu%sPyb)?ONz_syU0d2PsaIUP0VfgSpy){%SWtO@Ck6U&F$l9(e2R
z{LFNkpCJYIIv+F0#B`ru1*)_1QqDB>?xdG!47@ktu@zHgF=>L5hB@h!vCy?od*Fr=
z$6MZXQw=pD11?$VF0bY!Rktu14t0SmnW0!;bY(q}>rvSi>~RSsU${@4ZYs|68%e^E
zd3FzSGv{rVE2G&p=tXTA*cJh~*k2Ql<W$JujrtfD$zSWGk*=s?EgHaa<Bv*N?Yfet
zX?fH^_*3DAG3_$@-0K^EqpPg-)5*f~V3Af>R2bFYv{)cC&BLv&!An<&3#33C<D~vV
ziwE6ySX_Zi#Qt{^!Z8+K&uo=bs{pFJ2cdIG((qeGf%S5BrB;}iQ7>`sVr*h#*GlM{
z`EIc(kDSl*j;r{;coct6NWlt7Hk9RK(GaxlvaVT_wJ2sPnd1wzB4QMvl-a}a*zH1_
zu5qZ}mpkdOHOol;E~TYdA%p{xVsG!SGsYL69AHSfZly<4s3jvR)VLFg#*Ue-ij2H$
z98lN?3l}HwUmAKU@QXLTA&v1Kqo>eM_vMWEd>!6HZJffj4>bR=Ui0GzAyj(}vh{Ka
zLY^SaaL$C`b(*PrgquLn+!q;FziH~I_yfG}S}u>tw>Z(vwy8%ri(XPddb$8r{1f$x
z_hh+=<XLss<o**J&5j^ws!$C}2$r3U=q89L(Ac7lC+UERE2&szNfPU{qcR7^?fZFF
ziwsMm&O_yk*h+{2B(xn)-sfkoUNHX@JJozSQtn#q4%~8#xZg_`JW(&>?_|cAwcrjN
zZ=~RcPVjpey59Csh$;v0C&kIhq($ST?C5V49>wz!f^BMjS4zV7E`u=piIF2T5a*}$
zZCo`)=jV+Tp%xJ_j~$0JOWK>4@A7X7p>MA}nc<scP`5ldmjk<b@s!fCNYI4ps^|<V
z<sAZ@`hkPj?&VZ3a5Fm2p??pJ)cfv{e%-X}5EMysJJr}bP7Is?hM{n&=%GgXoztl`
z+!9jgaJBsBBNk<ugZ%mdprTR)g5t)3CS8fCH&JR^hq?XEve@*m9LB){Ope7jvHl%U
zhkCy|?hSrCE<^_th++-1Av9QxEG+r+GTw?KDv7TWj3QMY5HhZ>4IZY(77ub#`Yb+-
z&HhB1Cy<42W_=(7raHH9`=dopbevzVn=In0|2`3z10e(6b@o44roCs8@nFU(u0cuY
z{AmNy8`p>qdD_3aCqnD?s6+(NDT30xF}g8o=&@VuT`h}3d_K)l{msB-FQ;;`_>wA8
zs?il-Buwk;Y^jIIz!%Aq?9}D0FriSyjvz$AAXy|<#g?22h~=(HiGR^gW;*>4K{)#2
zNJt`}U;c2f+-}q$Thu0Q1jUT@*U|lmbwZn*6%8)*rvbIe+vtxeP&#}I3XRId53XO>
z8Y!2xepBoS3sbJg!ujQ<;sus>8}t|oU406&T|^Ft;DKSpc&x}q+}AZhLd&5(Sgha*
z)B70LN@4$aU@n}CK3vm9$)?D&ln7QMiB7}D)55LGIkC77HfB%})^Eil6*khya4<tk
z{P+9?-yzDcUk>jwmm_=G-nQmd6*`d`wyU7fSA6)cInl()%_++|3$Hq&eb-&k)Hx_k
zDlAYxN(8f?!c(RniEBp&y?%$9_|sngel`(QXO{m7AWD~Vf14$k?~+t82gZY)L?zu9
z4E(E!qMt1c^FwF&P=O|`R7KPZo{Ph33Yo5pk^rYvKrSm_2J>j@T1G{fzX$$)<IMZq
z_gXX(vFAa`a1XZuuc<XrjVdU*o|EC`Se7#GA*O_e+94pT%GG`ciUm-+vQ)Z^&t={F
zw5?16C>AvuG!G9i;#Sa>f=2zn{p?M@-lYZq98L_eqcw|vq78BVW~Jz}PC_<|ZS6uR
zW(D<0(5`Kn6-P6%F_~adXqdP9nwP~8@c(p4(=$?XtwxS~tBUy^PCkk8;l!_o8nI#9
z4;QXMmn@*c7bnJ(jLz)bNPD!C#<yQr*GTOaFeu%(GOA@jd~K><o}$-vIe?b1{(y(0
za&_G*lf8WBGu<%cS|p@``Br6<U=UO-PGRP*GlBb!e=^6q)W4jN;mcDoSFNwt7}r;q
zuz3peS*<BWw~+pW``-~ImKMr>`*rNp(J(#@USUs`Nshu6;|89Vq(e6v)O!j)N6iEC
z0oqOa-eKxd{!`yNZot^~He-{SVyL;@uuc-?zMTePr1Hs4O3R6Vx%*j|413^5V;-r}
zIen1tr(ccs0z%ub7>LOG$irzOyx>zOQYJCp<4;HQfyE)Tzf>+rPQ18uGmO!C1Dl{F
zbnmtKhMuabEl5h)W1dQCiW$G(f4@W304UZ;sZh1df)Pf2QdS90952N6U$84Py<oKY
z^X)421H@qh3gpV4ENVX2p)G$k$^c>&_M$b4F69{LN>2|s7gq9fY5wF2R1l7NyN98U
z?l|wcM?*RHQTdvo-o3oBHLHi_{_w$GeH+%R6+Y<gpqLiE7`OIpR=*tFG7PZK2ky)m
zILueNE`7o1T03VgsLLsui{NA(XLGP^s7nH}CqY8TdCLyjdDY@27Qk4bv>^8D=jvn+
zIi~0t)O+c-Rb$M4mjx5yo5Kc+EV}?tC?lhCR7GQzJMlEp4cHjzi-(jOO7;h7bJwJw
zW?<q^N6&YOX2JJMnS~Bd!+Q4)K**66dcsLvo|MNjjLQ5ZQm9q+{7w6yLOrWL=`c=p
z5b`h%o{VnTq};bEFiO&IFK5Vn^eD||bIY9DeuLGR#}IE=R3f3TLCNmj3e^b}PF*;Q
z7OJKY4Y3mtHw4)yRlzKiM=R4<FfN-jt1^n6tB8xpz4Lk3fz{8qEIeNX;2rMeSC0K$
zKw^Zmmcor;zC%E^H=_EAk6CGsLlv!(Ns>rf`DTQQP3a$KLH0&`Gw)#YY(r-0@j>*6
z=b&Xu*5sOm?-PHRk`j!UwB~6*;8zevpHK8Cs>0>5u9th{=Th2%Wxi3qtgzhL`a||!
zu_|<gAh4;dI<+<oKw;-0`9^CVv|O8OT)DI82v2|Jy}ujRV2=}cIi-9wJq2<a)TOK1
zrdypUQ-&k1_~9M$c=-O`g)M9Qss|q(3Tl%7zsptTJ3$1{WQ3fjGtp)DkY7S<0i}t(
zyTr-^D~*{LiH0%FP5_Sr*OIihyc}v@T~+gFM<&13H6EJUi{QV{&P<a0H8h#)IfG-E
zmj3YfZ&}>A%cEdW$d!bz`%d#I{B>xH1o`s_uv%BIUGrVY*X^^m8wI!3c)aeJ0Hmt$
zm7!_frw9P{tI?f*G(ET*^}Ae5kH-_>-o?x0uSWxDAyRrRi=@gJ2pMGr>g8KxC!#g#
z(+PLofjIbwHut+4%&zE_!bx4bI{nTSGRBMbOD?)b6MH8ftO<Ie1ntx;J&4r#P-2Ge
z67nz1-U&<MYwqwpf1*l1tR^7D2`7~putxf=ULFHYU(e~jCU|iE%2s07Oj6b3#aF(O
z3~`QQ(6nyWs|+04M$fNJzwAi@*?gerk@^ZXH-;o5Ew`j(pTSR@M3<?@OpzG-Wl@|a
zM{$zpBx^pimal=#u|%dgu$?+EAKunbjvF^wix7vJu4mU-&=xJ023sN7ttZ1#(a&Gx
z`ScRtmx}NiJy!jSKx-}g@<nqd6O!kQ_b1<hh}>ETbfZ{^oq|9^EHncp7jLvX{8nPX
zZ7WmSoQ3^mN0QfJaqCP~JH4k<z?BZurJpvBM9^t!j#nTxi(%1%qB-8*&Z%X>Es}}1
zpfHP{{92NmGp(c018=WmAzn5a^ybmJL(~Z*=>V*_$crxC`_D<LPp6#^v~mRuOyyN#
z8^ti`mH0^Li%v@s9-XxXcNcz#mj=w$zZH|XkB$_t;xT>E3{KusE-SOA4Dw{BKI5O|
zY8e|1v>hzk$?s<Tl1?Ls?Y3qq$sszMYg)>5V6N_<uCb~Z+WP1iCHDajCG%Kv+zl1@
z6wzsEDG|qsK|mb){cB#~t=?}X6-wB9EAbSZ;8I8B4Rim;YMZZ-ix>~any@!thNPFC
z+aIJEn8<HwzZS6EMEm=<pvY*g2q0wg###~6gwteBgH!o=ezFEziZ1L5-V*7;KL}lL
z=6o59@=90q5AHyuvM)(l4Z=apNAG6<Ahcf#GqdD(x2c2fX)*YJ9<teR7G1{mgs(=w
z4&~PtNhjtSpGX?@msFjsK|RMsElv9RtZ7vR)(kdb1;!r=c2JINV2D3z+)hz99q_~N
zMU;5JiMUh=e<OZ*J*5$xM5gCh?8)jA_EK6*3(ON5idH$o^AmnAzZlPOCC|J9lKL>-
zJ-k1D>i%)!$)ezmTot#M!z10-^nmMAVw1wO)VG?M*LaeLktZfnWdHMb08Z>7-U$!>
z0}m0A5;2?Q=ifAANd#R<YL2r_r`OaKs0-VO=mq31ZVBs#AGU?Izq=s7I6euUxPuwJ
z?Fy&I_uFXKn|ekcM@Rp9K~><&18l_Sk06lMrjbj&__vSn+H*OBC6x#TW#?{d+Y1En
zrYb!81e$a|O3tv8tE%2mRKDA{Y-aQQD$<;OgW1ipb&=cZ(WB37qeTmiC8)TSFgqb~
zZHl%s;8p#isXjc7PIh_-(^Bm?x+!|zJV_YaZ)D`pD^iBAKaPzAesjpC1&k9Fv9_&J
zIH<^;-BvNqbp*ogqzllB)D)swR=*sncn>W-k_Hn6(~jWT1!J8!*yT6eR3~1rwg+R+
zGXLTjAk`M`u-6GivvigHn=a;qa$%Xgvh+E4?+P7m*(=e$aE|USX9}+T#3QJ7**W!a
zqU-@bZ*U+~qpV2XK3%BX9vBGp`BZX}_sjjgAue9%aFCk1J$TLWsbkuYVqIyH<j9Fw
zBmFy;_&l%DA0K^Z2mTR@op#F27bc#!XLxc=Ery3dUdM^mph~}@n*MyEZUJc~xeqwA
zGBy;L+GZBt{{(4wU;OB^P%t)iP@5~X<KI2+R3%WJmtkdmX1s{i5C)LgmfR=;wSHv_
z6J3bR#!MU-VT&N8Z%+9xQ&v1$T_hazMpvNjLEhN{J?NS6R}EKUUyY<496znU`7%jj
zlapDqv)i&F*2Y;Y$GtTk{#wk*f#hxIjy+_0<ej>H$uzJlfxI7bU0hfC<raD;!p%8Y
zj5>K&%eGMk(^W!6ssNOK_3*@OsaS|UW{^&Aw1%mXGnvwcHZZic*B*KL&rJO*NlL@)
zeuDhvIb^somrqzE{h7odorOPjOiM4<$0o}*u7Y?=EtREzt6ZA86b~cT@<$%&BWdkL
zXexu}xn~q#txh-q!&DL2Yk?K*@q$689lo#JKktGfnIKJR45Yfssna)Y>w7cDaq9W#
z>sZD#Y%V6HIP*yu@E+nf!rDg`c8u^p_5CG#AN=m}QqY=PSg~vyx`>v{{mVph^84KV
z97wNNLkqF;zC;0ysWq561ZT5mZ#<(<EYwE&^H7^9xp8uS@$_-YID+@LH{R6rzpirT
z+*l|g#D#1eFTi*S=xZdN=zFHCc@TBsY!p7RTJhA7LD&phgzHr*y1~Ve;Kr27W>o91
zhl^d>7<ra{vDIx{1;dMU5A|TOpIZh#!|s=Z4|s!<=K&vj*%s`pHV{qBSJ^Y4A0&U+
zc5T9^K51BW2oXqYI<mjF2V|W2>SFF^t>>KON-vunbOLO?o8AclXGL*vMKI*LcWV5Y
z-p{%9e_TE7Z}0Di*#!#Dehy(Axn&NW6kF+)lDnShc}Fh89-`a}KbGjQ1afl!)SlCX
zfBCKWG{-G^Pup)gbCozA1k<Z*v^mD8;A5A&%xB*)A9kn+r($kq9XP+R@zuJn^&*$<
zMDeb7ogJXBy&wBDZCu5G?UPGmZyX?jf^VV~JQVefNQu0Aq4B6f*ecE|jx1VTk$9{<
z{j;c9qPX8#3cr_!p5^#$dD2RpbIyjosr(tn-5H;lk_4?t=%LpUXyw(}^LvGz^g!q}
z`7D2{JsFye=j1Kz^BfJCVjs8=_hX41X0#F+hBI)9`&_fVp6J|moG4!7=v#_2(5l)n
zd8kumqVD+2tHsF=^(`konaB0P)awI98v@wjycdV*G%sk#5MI;OrFShlbhDAFF}vGh
zv`g)I6hnKr1j+E|Z7HcX=P)DER&%jDlG7(E5wA5zK`@@Q_)N2`!2r}*wqFD#zqnq9
zmkVGE3{Bt^OBFNBqF3#Gf4eIoU-7+Qs_*aBq)brGX>0;LT<CDi%Ev5yKeY4@<ZiAp
zc|2(puW9Se&I)X8j9f}G5wjs~6mK%UA4#Vo#-ceRpOURer<&}MiRElaDug*E?#tji
zjb>>3HRQRj3*Du^*4)u3t&5^fvBG`;-vp4mD{iZL*Bu82MO>Aq3Y^8;wDG$;)DFf?
zTl(cEskP8h)R{MX<zx&yPx%V<vsc|R3x6Z`a*yo_T+$Sl7Qv5Eu0&T@L>U--fE&z%
z(liU2<2B*cy^gIw3fN7v^v{u1P<4tUMR(w`FPKSSuWneUzz(a-@8@%O<w;=ENCBjr
z*LVBXSJ~E@Uj1_Rqh0<mkg2-=LJ>KM^cGb1{_OSDRq6Lif*nF5?G2v5q1ClL;Z}!j
zv8HgQWaqrow~1ptCqQj+Oy8$KJV=ocz01dvAfhRr>&VF;7{bhH<zKLtWuz2t0cV)>
zx+^e$=U^1uP>d^%@4_qAF0@W9E&$NGq&DCS_@tT?KClJicU=Lec6Bb>f(C~%1qnsG
z@)t`=)r|M`pXlz*Y$!$~jAwf!$(UqF68Jr`6vt6ToR(p4pg0*cJIRiG$5i>>k?bHO
zZ1p+H9DY0#TJTTBhmC{Q_CH&dgR_(qN-{)K_jJyb`fyG}*x8IuJwoCxDFK(yWva>F
z*KPlZOXpNy=mjcLRaHA7#Kf4mQM{-UEK#nxF`cCua<T0v{OyzURS3loysh|cQdgW`
z_th(Z?Dr7sJ#F(6cev}~ng#FGGq`$k*vbX<{Kbjw=zMOKrW1|!U2jj(q~iCq-;PR_
z63vZo5`(Af$JNB>8OO9^t6Lyq?JVI(qgPq6pSqvy=2@a1<!apmsh0X)Y-F7<0(IfX
zg`PQFWu%+E?c8$x{Qk=h4my`#_4&hkt-WoPTW0Q~1Tw<ka@hRVeq-7wM@3{6_(oR_
zRf4fx`vztms}37f(Nx{J8Tubra8_>$e~I{BR{yfRle2=ZSKFe7$1(;qAeWrS`V=MO
zO>Wustx3hM-i3o4Gdpp$0y>QEe)kx4!(&r9qQlzf->=@bEP4usWt%<%i_WdM8dnPm
zj(tB$FJc!ais3;@!}4c3J4cp${`ljnk68Z9%6W$1$c0C@xp}27<8*~ZH<{8&v4Wp9
zKg;X#(m(BII1ZGFZu$U#)1Km11AgcY#$y;c>|NOBhbUxY`}GKuo+bOL`(d{4NFHpD
zg*>QmHrbb`(KkgXo$sc%3rn{8jC&E~zcpU;<z{O4;D|8^W`$qcyb(+nFB6I~a1jD)
zZMrBfDIUM-*f(e1!VOWd|HXwavOKu#eP$&0#$0?%&j{7@nh*n|-+Dj%6nMl6rOZBJ
z9-*v~Om*xl9h$j$R1KtxDAD{7U~*fnR-Wlhy-X&Y61=5C{Q^p)dG3>+mN##OSI1Ka
z`7#izX(1DT-Pm;gW-{SN@-W{3?k!;r+C9JTMk9PGBX7!LtV@}9AZGWG6*02S3R4O;
zPRCVgCY;F8VBG+Ua!$FQuO+^OKIkfNW|pH?tys=@sPPa{SUnQ_rTZj+RY^#-fYBVg
zmQ{m#O1jj##3&}$c%fJ6O+g}7Aw(tI<)tFvFnyZAa6J_H0&HJrh$sUaE6b<it`pS~
z4MXU9q7MAO=EbDWj!e|g11zhe7_LJDx0qJYY^d3wX-EO-B#MwO%(}bx2Yps>V2$80
z(itcaEdYn3*QzxZ7W+0TgsVegHgV}yQDsFKi-{w@!k3BDKR$3B-<l3XW|rlTm2g#_
z{x8`oZZSmaBR&RZI{=00N=mb#rK)DtZ0D?bTqEEmLLp>KXs1ACum*MPGq{TjIp-Nz
zZj>FUzy`W4yOeR<t8m?WTCqP4{B_*zoTtRh><S9v-c>;Iz2XgYSKlknX=2k0f+YpX
z#gAkDmUv2iR}YlnYD~)RK+*eFZ@|}J+0~d52<a)|1H03Zev@lkZ2XjLX&o`y76tJa
z70Dz2f?<ohX6ZsLQl$E&<J+u-c3!Tk?edxre-yyiK_KdC%d^h<<EtGHrE~7ZVQZfY
zMd5r(+Oa}+IGc@pn`~ngTTVu*U2$4}H2GMHFD-}d%$iqsWRp$9niF{c1As14!X+d{
zYCI!o%wXJX&(Mmyj?j(0BhZH3dFXzvYD<014)kWcS8BC#gEpC;y@l6I7vI@OI%0Qp
zX#j&hux@tv&!0EB&hQObA&ON?J`A2Sq)*)lAZB<#?e}#3=-=vefn>Z!CCCJvJK2PP
zGESor#26Vv?}noRt^qrQ<Db(%(s}Al;Ijt&kE9KJm$cb0gr1*uZa79)k+*d#6q9j9
zI}5JHltd#XWFfh$ubEDuQ+S{dT(kNLppGJdfQpWh)t5Ijdx<7Z`fG;bm&EDNGH>(x
zE%9PfO;iIz8hIDI5ap(&PUWK@olBHGy%orZ-gKB3IKiFR3b>Y9*dpskGPb8F;bcCa
zAI3GVf4w3<Z@OfHtG~NZ3EFg6!%5_V*ZD#izy=`qg>};Vh!WZWOSTaVIX*Q2!bf2%
zwpHQDP`{<N7iKi`j&b<`^x4-UwAf-F{v8>_;Tq5i2A0<Wtk=>U-~js^i?NzQW97PV
zl!E!GD9AQXJ3hES<sEMe4%9*|*pWhs8bE{=S2~jN2&0Dg3S0a)o4IxvdJCQ+4DqX|
z3B(%dN8ppqsAA+K((pw(PXWOC^jkRCH;R7;pb_jS;ixB^#S&dXTGO2W38jWD`m45W
zlQdhgsLb2GZfj^9{wxr$NFVZuylp=J8KH>ziPHDMJCgFptpv0hZpey4GP!bVSp+6O
z3co329qvc~Vhcg%fdhOixlap)AEmQj=sd3kAgLfeR5LM@*8X($Xkdi^M3|&i;D_t)
z2aCsG&;ag9U80A;fgVZ!OosEoOSp9|d0#jri+ud8Mt3=t9zsm^lNB^iFKA0^E-u%~
z54zGU+-~qzPwK9iL=tF!MS9Od<~N4qu7q)<Ue0s197~1O^S_!L4Quq(se)l>{%{_i
zL>2mg`@%0Ep>Xb`z_&-J?;sWAVR4)de3xeMya>cT{1)!rzxXda$b8LX7)*I`-a`Jg
z6D~jMn!HG6`<v)rk`MuYQJjdc5dfk|xOYp3OA<f(&-MHMgAX8mxf^LL4T8NSoFb8g
z!f<)yuo2i?X!gkalk+n;{q<JZVKaL`enB9ig@SF(>d6I&R8QG!i4Ze_9Jbj|RM2gH
z^5E4=bk4;}+6c`>8n$md#M@a|@A>Kk`!@por{WL$s5P8BxxAVKFMta%0O_YJudTvK
z5Tj>exzvHfiqWyievS{RMzTdAF0At)x6?c7&g|+SH#_fz@tQ?g%Rul*n`MMcJN%DB
zuc!mea7O@~b7qh^X${e_vG5~1DCw7X7dVa_?nx3CAUXG(!X)b@^hgGi`uJtS?m&r}
zK~Rz3hCb;_;^DqhMN)2N{QMF{R%wSMtS@G^>-06MP&jrTb`oA<I%37A7|bBzAr1}#
zjR^-o2dH8Wd}U#HZoBR`*66EQK|a^bSO4macmnWbzhttvy;;ak(crqYbmSM0tN3wm
ze(7Iha$yn4{$nVB3U<dtZO7^`;23^v3nQ?rX>(<bJYSkefB;z&21RrRIK&m7t46Eo
zaxFL4KP;TMDD3e-Li385My@RE+d9^Tp8VKFQj0lk%}3c~5)<>%*x2nvBG@f+m3)YY
z&4KUDX*(@8`fUyCeCI}f<O38>2uW|Qel(jg*TrbQ%6$rhlm5BaBBkNDX*#OMK<ax(
z(G?5fxeQ1I6IQe@!~NrS5A-(#kgjeB-_3BchdrFW&TY0rBR}%62Q~PH<Uk=ZR2ia-
zEN(nGN!{!gTY@YO-o=C|p?$)RgEiBe2RP(4c;G3i1MFyaA|yTA!RFBInCtqHuGoVi
zIHMeE_L07p$}SKVkQ(-@M|QpnI2Ep&dRHr-DHG389tzTC8$v=5!fa9ji-~wggQ^wd
zve1_tr0$$yB^cRTG5usT>IUNQ19Oi;6;(6eI~5PeT?M8x-;3|{Pd{N<q=G2{9Nw8H
zI2Sby$UE(m8}U2w8WzU0&1izxHqwtsQig!xU%mPe=*Yg%VFP;(s@{$ybJT9kW<`5{
z^&ekD6G|D6dY~Rq-%?=Rq0q(}&d#rpzs>{_(R-8JSV;?M&c%N1YS<#%qhLI8m!9Qs
zj0KyK%E6_cp8G{UX5T8rSimX(bUqJG#X~vZuPDeBkrKzF;-f?vYQ=x4K$sW9o<wbB
z<f;8kB*qf5%jIPq+e{n01h|9UkrHrul2(e4WRy4KQChze@HWaD0)`pNK2Brvfi~1K
z)^GS%`Iu;8B6`rsMsg2L+tPaPclx1?^x;DAtI{_NE1sbZ9PuCP?+da(avsW)BOPVx
zUlH;R53qQvbmaWPIzgnZE=~#66H25mKbc%YU58lE&Kc61eid&P4>l>XT0@2hqSkO-
zf&R0GJs_@Yy!hGop{4)e*)jZ4|C$hL9w&qt^t0h<GTa`OBEOo*!Q?0d&b%&T`;v_I
z-D%kz_c0Kk5>X<Mom~OIXoM0~hg>i7b?1XA>z_ozk5I#CK_U%*mct20PgH)IT(U=Q
zJFP!SRTeIHb%m97NG#F;YLHB)Z8-l>>`O-;mnVFKh(`a|uWFym8toB0fAbAuY)wSI
z=(u5cqVu)ysBO{cxRtScV}o{dZ{?vmK32;@a!8OlCi3Y8Ct`tIs=L{R$eAeN7YLf`
zF2~t<!?Pr1owt)4f<S!^Y43>{dw_h$7jo6D5vM0Za5Gj&@pN&i_s@T4H5;O&k_yf+
z8peY#pC^7FR)Ay~g$aXKTM>0HC44VgvTr*bf+&~EJ@$=whtxItK8^pa&3Px|Zuw}S
z-8UypNEhJEwgaFCuvK=$bgy2o_IjJgX~BUk$VcEHL0b*v3WRs0Sg^i+44~z+Cr)rr
zI_UIV3~u1!94vR>2Is%-LHOOHq5Jdav-8jQ!@fLS6#}2YR+vs!k-wsGr9B$rFr$Wu
znfhVXVF%$slJ-~By%S@YQhHtu;MDmQubfomA3u~I_yIgFUq4DvvER;Sy3g!VPOC-E
zR|-C5(&KSMrIl2yp3;&Qm`Czxr|f~~-oX3XBBXB8Ti*hlL1_q~&x35_#eE1RxH$4%
z0sl^ZFHC=a1j;`b+~taL<hbnBp%z)QZgm;!?zT+8U)v$=x{>!2>i~U_vMMSV4oJ1A
z|9Ck!1Q<NHjOLO&sf@mO6c#iE{xvHOO$AH11W341!i~6qyTW%SvQ6ztLyepk50=+r
z#8qz=g*WRu3Xv&R8IN9}=oLLYY0ROYO2Nd%<g~r(v6vu<m(EVV)bHXa%e;Sbz7JO@
zzm3-vZ9HyV3{0u^#%;~IS@yJVu3emqOkMl$1L7Wt1DU@jE_3{2g4c%5#r-5+*n`8x
zrYNj1JLK0YZu}rb!9^FA>a7gx+AbWyM&iM-p_stuiT8P1cO(vQEW}#8FM|~643kD{
z{^XrYE?Bu$-lG@k+%6UMfiZ<3LLNypcA#bYT;pYnSKe@{X8bi-BGom>1QO`|0?ZNu
z+1J?`Qxsc&jy4+ZI<7j7)=|$9&t)HQg7b&InAx_p{ahz~Alr#vr{9*>1=20lM`O~{
zmZzVHv2JL$7Pj8V9yvV)vabg~Vlf$M(?jZ82i)T1TV_NzqNAa<uNIX->SN~EuZ>CO
z(XZ#kv^H=@x!!mV>yKEnhb)mP#h8~s?(bI~kNNWIMlZ{~Q<f*0_&^NHC<}Xd32ewq
z9wx{phU_2^PY;>edJFa=$A=3c7*37v{!njF+wovj{8usFFzmPoh0#>zv=Zb8oD6lu
zFtjJ`C8T0g1qcU!2>iB!y$I6VVC4LG^I!rC4yXuvOU^@v&K)iU-QRiWIv7qVSO5A9
z+I*6i0O~>ang?n}^7kijpYAL3sDP%)>iRZ(g00JP9}(!6ZU`pWcwqu&qP#@&xeAvO
zY3Cj0zm^v1sd`Hs2^5sNY-R~31vKHmX~7Sfzc>k@asSN;;<a-{Ap$OBtHj;@LvfQS
zEqPdIiu8U<=fV%o#fc`)Q{=a=IH!4bX=iSeXFc1BcJ_XLcEv<+ol>XTQQ=;dvaY(i
zPE?(AVwIg5x>7K8x*VmF5pdI*D0Ev3FflM?@0-LeJES5jjeF-Iw}0Os->-mokp~uN
zTT5Nt)$c=50W%sw0PwzWc{f}JdJ`L1k~Z$fIB%`DLRrw&>smrH=MMR*p!dCCLfXlb
zmEI7#3KTpq&cE1Unt`vA1L0tBjXStVz6rMz8GD&_fNONh612C?DU$jI-Ed*_mAJdu
zDmQ74tNe;>#QuS*N*hg<8-rN6A?4fnBLUeQuTE%yBspFG96<IZn|II4pOQgT25pno
zs*AH-vz>aHTTO3f`eOXU#Pv77bxv|a9_D;SJsrc*{C4$p!K!j9^T2GIRc5NxELKHD
zuUL33pN=eTZL2Zuk8(KLjW(-Hu0^@^XD99Ha(VlZc$-z00ih_2zMTa!)zvN*q}l1{
z9IAHL>A3hJO#t0+&L-Y<reI8JV}mGtuJQZ;sZhjFU@p>TPCLijt3*o4Z`O)F*Tb?=
zOq;Q5lCI&S;n+Giy1&t~QSRfol5CPz37&4;_@H8{^eRa<hWRRUnF$L=jtw6Z%io;u
zHW56mdEH;D*VVF+zd7l3{}DwiPd<{HD;K-qQ!Bb#IRiQ|REexZ3lvC$Hdy_W(P3RQ
z*{_+}kQXGY%JW)RLq0P2QCpu@4PrabJIQV`shb6h?&fjzc=bCbMS(-$=cog~+VHL!
zT*+-6Ox@oV7G2Ayi%dTzRFf}%+Rc{O{!PHC`!+Ju5Ip@C2IQS}{}v?et3#uG*?Ypv
zpfdz#NC12))<&iM@oST=1>q4sVb8m#>w2(&z4~Xwfd0V^i_nQ}t#hl)Brb02zlmA2
zj=WS|DXB0c@m12)nd0Ib;cvqF1~P*oSIqo<*#^bS^BG+lpf~D>>jD`}9u|8Ulx_rk
zc(Q%Cfg0I8wBBz=osBexM@-V<a~OdVtc=p1vv>eI3Ig8rPX*t;!RxYoaX!8<=!WZ<
zD`4x=UUmJVH<{ENC04Jx&M9rg_tj=K)(1_f=5)O-=1jHQy1#8pY(J#x%0+BfoR-%*
ziA=4*+PbZTy{Bft=1U2R5Z`L10V-}F$evJ$Q`%-RX#zF4u2*@cz;s*ZF%Kak*|LrM
z_|^jWtWsw{8=Lv?kUQu^Y16@mq{@-kt{H5iR>e%3VZ)ZsMPU};#3G(pIq*cQ{I+sl
zEdwVis(eYqlXJa994e+f5eIL%_Ocl>24ZplZ4=?-$7tFybyK4t#fBHpMc_>%Nk5ZC
z@IENr5c$K)cE)~5X{}21EGa4)Z;hIQIY$8?g9%X+<P1e_(a@W%vK+Mg#MEG~_-#{i
z26b-0BI|t_&E95Qgl~U(!%%cae(hasKsbBbFr?;H_fTIa+4GBg|EznJhe?>x5A7JT
zSeq5>TEpC>u!o2*X<IMoCPV%9t{M--B}Rm%?VI0eOt;_8vfRbF#3S-2rL5$Pv#vCN
zjBGqN7ana~2KDx!=$W#g#zWIq@7?0wE)7{TcAs>J^=(D#N1pw*Bq(;*PGdA13EFRB
zyX6eWhsajv&^CRy68H3CKh$;cbmKo-sPy%zj0z$6y)s`T#-RhnSn;G`(=P~xsM%Tb
zvBli27-;s)YtZPzli_n&e2trr+e3svP|1X0%lW+{UIIO<1mcoXpl``=%=5E#K<~fA
z%@wNqKc)-n)^2{CzJhrZW;HC#sv3r9xACi)kCasxX1^KIZ_Koa{SxmPKiIuV0$Hgu
zCBJ=(>3oUq+-(m_=SJhm5aLnKFjW1C0?(5l@tO1+a?^2^&T$R0h*13oZJjev@$E}x
zCQtI}r)^!uULfNVX0}6E?cAoM^|@nq={cfP+J(W85nr*K)6uk1M;h+ihSuBamoVPn
z%0VjG0@H@SbSzhsr?s;g--SIXEkj-I<-n_6c$ZA|Hm(g*t<nJ)+?#*7S@LI7ZcyyF
z5%LqK)v6SA3=6Ed2bj6X3le7mw-J?k)@5_zEK`!=#P4+5RL3X1O_pibYYtoFl=X}$
zj=0{MWRPba4c3e#ga*2bQ<Yv5piu_o#`7Aol5{QZoN9fMTvk7OXQe{eIs36#p@L48
zQhDsp*n{4;eqMeYn(rU#rF2)GL;j$??Ji2DaMhB)YhbOGVxd$ZC6pThv><cx*VT%^
zT4=s<`T#dMHV5&Lo-aE>kRGVwM1n7mfwkzxsqEo3vJPBEkv5%pC7YUvMzJ|#%SQ_+
zc+#FLpY&i(rG7C&tKi(1nsuTy#riuh7<FQp#{Z~5cc9sb=C8mzmDXk5Ph604LNDJb
zm1lg$o#0Kg28SiQsaz)m>>l}rM7@Z`)yL2Y?)&+a*R$#{eJ9tzI8PI3>jQ?~K59Kv
zP}&|oMG+>1Dk1YDJ0ym>$Lx*_$&0GlZcBRgNeKJ~V|cU0mNYOxk}AU(l()ATc$qiV
zwc@dLLlPk}3w~J@#4F{rdW|CS*J@zK9mX8~5F#+~$g|uHbqNP7%QJ=b(+kXr&BP(a
zje4j<*@GZ0c<%O)XQ*Lvur(fL5F{9ivStlyT^em0dvpZq32x$N0_!~(Ih=v%jVj6?
z5fTce{t;c~fhbBH4pIp9x$-HOLKGdslt4+a&OjW(0@b0kxWgExXk&g4W`O!!El5JJ
z_ahd*I|XEb)dFB>-Jwmiaq1Tc?*D)*LuL%h*;5tmF}x!^=;MetNdd7$KG}uOYle(R
z$wR%7pMGec3@8&J_E*NDgue_-0A{`Yxh{=AhLC!^mp%#3|J7M}6`B_t?cIUD<{2H5
zye2MrkvXWRAtQ{3@^2`dNyI}8Y^JAvilK2cH%sa+Z2}xfH&Xmciya6zlAjmpkjkKY
zthqWX_@YM)l3kBy0IfmB3>Il}url6b(1Nu<+lNQE>tL({-WWP)h$-Q)%5|>H!wee#
z>L^V4F@8eo&H7t{=b4j-ghz|iN6}trurr<nZM!;Gyt4-a_zX{ZaSO}Eij-0K<MlLS
zO~1(LGk_0*peJB*uMvDHrb^h3h9fc|{$dKXQ~p#)9n26FN;L=%-fZ>$bnc0ejrAS+
zm2iSVwoaS+rJnIVc4~M6yomKiJPK%UX|T9(Bs#(SDY6L+Vx${?2hUN3nCF%IGn&m5
znlaRe6&?-G^fE|2@udak?(J|t1tE>VLd5b=fb`I{C}^Y`N`pR}(G*TIOh~9f1ezPt
zzIZsL#-Of^zmmxcDN#-Ab=T<M+`VA2C%$mXFAr46q~nh1p)rJ~&A72*Q()6w?pLHk
z6C1#kVP*G``UMez5<Pc4)l%IKa>2<;m+*bm;0*r31R)c95FgCofyUL#7U>ziSMQVn
zXo}_TOAaki<{dpek#RH;YLVS6=6<q#O#C6hla~s{?P}Exzb8kd`~<#=@`r-Zgd9wU
z9w^2-!nHsjeE=^Hdd`dPGXCOE-S^|XoIv_f(RvlWA67EBB~M`;;MtiZ9e!osE0Pcl
z6$dSGxUbGV$dS+y{%;Niz36EWA>HKwQ0)77d_Xks%a2sNG3MPs<nNC6jxAI1p<wr$
zCuLyXE@}Cb-e_0&CdmZ#rOQE4>dTZo$;8QXxIhX&@`(VSrwQtQbNz%lqC7I$0I|4M
zVhf@P;uAIHNTR=-xK}&_K4o3tlOQ-~R6bz;tRQ*}5>ztKrYKl64DuHEBjaufJlz--
zA&6pnxOW)jLdQzfPPpQ@q=k1-U_iUUiSF)W3V4I)3u9<o4*D7j{w;@n&LQRIzrSV9
zZ6)6cl?0ab{bJh11^<9^qr5qoocki@5qnf`Bs1xND*xkUN*0YW8{!Wo04x3-QfKrE
zqtgfv`F#t`(K0sVn`N5i=_zi;a5ziY&Un2=^?9F9G_WakFnjm*6oNC_6i(O;lXK=F
z0``Xbyu*AmV<gmepCn4Ni<E{M9@3iqBIc3M6Vjf|@^CBaUGNx~Z%uZ_UGPNtAMJ1a
zvwc4JPkXdZ_CGQqRw(9w#Gmb-Jc#~3c~Ck8sYBB83p8MCy(~wdCdKh#dWhF#Mv`$2
z)aD)0O_YJd9^t_mK|8v@+l3!YhHCST<{R?KrQajlNcp?X``BA_=uX_PqtHzro{Yxy
z-_e)N`-zo8-pxBg-gY;+*g<v<4i1)%ecU5Pp&h-?mHqAggHNxXzP@j79r7WJ@}2?l
zn9IJej;#QFT@dSWklN}6La|#~2iYq-*3Lc5k37@+{zgeDl$mv_l6gBqmCY-$rU6q1
zNJSiagV@WKufebj1Um@R7Aqt<-J)h}nzm<}?7-v$X|wa$j&kv+7x}VTN0Eh?I{jgM
zIo}hsaFPq!8}eMtc_Y1Jei)y}P*B6hxv(!+*9PFL;x2!}Q3)vJN_kytP2<qE2juED
z`Az;%RW<%c^o?=^p6q?-4(eZZKj?;JuD=)G9^KhY39aZNb3T3SPk^n;*|e*wwlywJ
zF|Hp#*V||xEOp+PUB6K#t)5rv?$SIxhhzJh&!ncB8^U6NOWyIey=4=o7}S6lIMbO}
zOb&#_ZzpIh)P&{BfVAwgY)cokJ|Rbu-Wh%wk@5GjaTaAl<*EA^MOl4o$hs(DOHL^$
zAE`1*Th|B075j$TkK1=Moz3F(`i|g&Jm)rP=bR4Q?f6Dfrn}+a<kGQxhGUb<qKIB^
zoJ-^KEWBx3NrdxqTLjD35r1bR__UAEm;^*P7>a+QmWT9P@)gcXvsa$<xoQzu|1L=K
z;PR3G<r|`Rh46!*p$v^_TAm86MMY+n>zQ`<OfoZ^V4${UwVRB`LcD%gh4v{Op}>9B
zGu|cNFZWOF!?oaMkdwr5kd%BdL&iIJbu5s2k5<fr*-%Y7c_<*8L`t%#loXkc4;FB)
z{oUQSKNGHmF&#6m<4@|c>_s-SKJMXGU^ZqKc79Wk7lk%B*PIr6`nmExx)h&`fCn4e
z4^{|!$FB$8N9Oms=RgSoFE-ZlL*Dw@C!TGTaqZOAU;IiAoSlnwrk{yl@j}zmI^7St
zjrE3PXW4!_G2;DE5?J}8>=a#eIHU?tS^@=!xQ*EfmJ@TiFRgZvmg9fTP_5zv88zxZ
zg#=^7B?}@C9wZ<lz2%mQ&@YC~KP{$wc$=3Noul|nQ)3M27*!=2T^+`Wn-Y%y<W(g`
z+ZlYwmPKz!!IH&3W|>8f`dr1KO+t%LkZCe6<&o!ijEP_B#z3NXK0#yK;`#@WTJ^q@
z9jUKZ2uZ4@s?x~HC(@qS0w0~6xcF)wz#AzhSt3&#d~J>Iv*bb>OMoTPN>^q4dBLe(
zY1gwaEZe?D+f`6&+-3A}f#kKE!1o$u4lT37u@IUXZccwYT~H5$DTB`PS2rC>Ij__l
z*VE6?#0Z#tkHDx<Njp-Jj2C*Kd_$2jX$w|`JVF#2HEx2H!||+#KyLS!+T^xkq*gwg
z0rQwX5zKuQ6LW%B&?Y|k!(C8<a}<2F?ThS98sA)V@|oEIXOwxh{GC*ap`UDnqR`M&
zrU_X=k~~rKpIV1_C0UrZ6oo`-cgxEPerjbj@#<aGLHenu-sz%i&kRyP|8-Its}>qr
z55(n@XfEKooM;-C*ROPC*NU#)Ome850L;TRh9kVkieDJl%*2zlm@M=3((g6o7(tWC
zb+c^}z^{%^FFF_`)PY(Y1=Z>EYKzK9LpzcmI%hF(H8evhw0uQl&k7oUeu}h!oDqxe
zGq;&5Iuw#ev^!|@KcoO4Z=bf*noVWoQ2gxfevB`?jWG1rn#zUu1TUh+-{NEH@q*(l
zM5S@X;1Xsg<-l?XBcaq-FlrWty52j_F~4Pa5?TBsq$nj!L}igTzxIX2tu|FpkPu`0
z^k1N}mc?84nB}rXCmq4D*?#5GRM8co=1mYYsgxfxOx59Hv_=7PJ7)_D3lm-P>AqzP
z*0|xdY;EX7%;HV)hP7CWF!J+uB#inM&x!g$GVgTgjib~Ty}#KtOlo+I)h*xI9qOFZ
z-i@m?I6#PvlvUWQUg&IJF;*Yl5+<ZqAoN3NOPp2tAos;YVm&n_&=)<+<DSm8mToOm
z9UoQZS99mr;w=EVW;Ok}kw$8y#-4|tl$-HPYI*ZcYubXxFKm1k{5#-hPA4Mb<pK*a
zY&ud%1x2P*>Od4S16}q9!srf?L24;3?w-$$@e`PL1_>GlL8-%PhQjpEv99NaDy#%X
z{sla%dJf#?%SPb=@D2ISdi&B5S=U1;%EyCSL#MM9B+<Yx&&fL(^J}>&FOR75ZidJi
zn{jD3YzWBbwbT{iJ_mHQ>_zEZmBHxSZhpmwcVSRJxp7H=!uJF#oNkqN_KmKiqmqHw
zTt@$3!4R{{j}1;LzE|EzC^zksU(IpHH=oO|&Ad;MdLN$cNM74wzY@(d#fvW1B0xK)
zXQ<S}>A?Z(%PMxxOiy2+(uG%ff&wKbY?gGNnQw_M?_e7_cGae?+*D*VGJ|&ti1(oP
zNjp~vm}IRXN1=V9vACHO&AaWbGVYHcD#gsmuR0_o91n#ORdb(J<^kf*Z1<feqZ6z=
z&@~SVk~R%q8nmg$LAfVhlt*Sab;OxQ?b)_Edp5w#cb2G@g@({1MA}U!y>?lzY<cRk
zHhHtfwv{3meWKU<lDr&FJi(9-hUyBqGT1!2#RMC7M9$@oi%IAObtn4L5jpeFkh_{&
zCz0HQew4w4AdXRlccv(RQERQl{`ik_D~UD~j~y`-i=73w5MS4U;ytlUkkVbs=4`iT
zDgofPwpRW_bzRbZ;QH;#KBywqox9@&TaP@P)6qJK)6pWSbptH4yit_DO?F$MA(Wg&
zYLr^0>-s4<$PQC#?>e@d)nQFDz;0bNiF)+<5DI$n>CgfiWxoj;h8mqMC=3%FzIkUc
ziVD7Dg5*_f=m)^MBSpK;>RXyR)YPOaa}8*Tv!_7>1<XG@_qjc!zqp*jZG&Bfi}xBo
zKR-||&evN9F<df_T_n+cmxm3cNOq2IxTl3Jc#`rVihk!07}7Xr!*=kJKf9?Ub1Vf3
ziKUD8w{ARbouNitqDH+KG1DcHAD2jqC~eJ)AzXd6Pv47i+eo0~6cv}Bba_{stOc@A
za_1d=X5-e7F5YQi1Bw3i;71NDYMTv)IwGyQtXFq7XV;F6{9qa2_BzMGz*N(bK`WXx
zXXng2BQXBr&c+2b#tDN6#(lyYk%*eNR7j&}eEmDh{;o|YVbaGpXiPjYzqqR>+H;d?
z$@d3~$b)Ejv`!x4jhC|f=qU)z0PNzKg(BDNvJl1aco2LC1@=M_?mG9v!A-J{;lENu
zH9*i!qR!HyM3ctQ8Asv7IreJcGC5;LmN{u!+`g~{KDq`xl82-|_LQOdL$26o_f}0`
z3p4c>J1El*DQ3q)1E4T>u~|p017H#%cu*y~2Jj_gI4Ogp^UwjvGp#TUK)_#=6SFbW
zQV2oVcskz`@{e8n+C2kw33?Yva+1+f<idq7&ZygEq&rdQu9g0yF&G|@(Kx97R`|^T
z%3q=JTd!B?rf<%_ryPE_*!!{AFU-A@xcxN}5;yAO?Kc)meEggQ{u}5EU|eCQ9Ph`C
zhg!Yurpx3hSz*bsxTimfvSPgDP`}mmaun5NUBAI>w@@rh0wN361Q3ZUf_K>eh^$lR
zIBy~f6jV6*{}5SmDCGa6cV__L|EXgBJr~A`?wbk(!$<m9|EsO90E=7O+8(S}ad(FT
z#frPT7k4kk-6hyiC{PN6yIXO$;_eiO;!vOz7#xcI^xW^>^PT(snJ2S%GHb7uN!Ctw
zlJ%~)H4J_h_F7u(MM}4&%IZv)5<5ZvtC_b;^b&El@gKL*&(`<8>g)C^<6HF?JLa$~
zm0!=T{B-F5wf|MTG1mPKoG|#q+1|D;u#%*;YGI&CZ`B^jX2kYRZs=n`WuuS&LcQ^k
z4TPt@WYqT3L<VrK^SAB-@E*41Hm_HgZ>$>~csw*%VmQSDnZ3B4GHWP5+osL1fa2;I
z<1*JVl(q3~0?`#ai@dn15I$qt(@SPA4mur(=S(>vPE1t#h)gxa;l)_ppYd+5TZxNq
znwa9%#9$2M`4kM0_j90YwFo*MOvs^jgVhWCZXV9>P5@DnP7`9)*rls!Zw0&jS0sgP
z2<H3|mzs(Fq1}yK$D<=+zfMHq7nk?#8zyao9v|UMOHV=G>i|1t)95VgO3f;~+sn{%
z8dHzP*4I8k1)9Z-QFd5p2qF*%9f@7$P5t_uxn9oUIyPLhdsMSyn`KgFeHs}M;UG10
zExqFTFF>`Gb8m50BElKuqLak$hucl_DSlIr9f9WNWrdhId#8F~KOFaMZ3aGl<ZHWe
ze}tHJ`v&A584~REYFS#F+~=p!67XGc4rAI&is7a#scw4Ftf6jc&hF5C$*%&MOM#c!
zxb}~yD@nC=R5?Mt0m~_Uj%N5?gRh-x^Nb(Z#(~%i3$4VtknJ9hT<j|l3MAd7BqUA3
z6Lk@RD%hl}jzwOA)ZM3HpOvj!&1;f&CAGTKs-k1-6F1B)v3_b&LQut?H8My)c#%R$
ztYH}~L))w?My2Ujro4PenJeiql|-{S>Np-Dg7`zq_A9x)%z_!8>C*coGaF=UMnhQL
zZh#_7U&Klwh~K8Ea+hl*hA@Fod@|cW)ne^9?8xHBpi0+MbaB_a^)1hCxQ{q_s9kUo
zIQ;#CBS+o)uo?UvIQF0qZ6fFJ2=M4^pYRqwId}x9l48ivsq_dvdC?uCezX+KA%U3Y
zh+mDF1&8!gLgO9udFw*P<MUrOZZ8Tk@d3WNi%HTD*gDFGcik3yu$GwWxTXh>ti@63
z9c!RM;bhj|LZ?b*I2{MyFo%se-|~+4#5rJ@^1G;C0fjkOH*};W#oYK0lE?L*)qTxu
zWT&JtXgm0`F=($o;{aXqPcPr2ifZk1-MGE_WH?_o-jKK~(bz>KH<Dc+)ThNry$Xn*
zx#SgY>Tc<k6LGS5I((j-W~PY-<$_;_g>*$GZ#BcPh(xTJ(IMErPTB5UDSe|nTBx*Z
z2ggk8(KTQ9kwo3rD1XOb^7VP8_Ac5eJ8!CyS<rDUSvj?bw$M&OwbXzu`JL<QD|^MC
z?!b`G(iVSmJ|3M^_yY_)3o-o4Oe>&5!JfzJ*upe6dD7x%>d>;Zh)JdW1<#+w(XrgX
zxTnUJotcnnJ_u2to2Ww1XoeAP!NE5XT(EagkD=8%8kESPAir@4FM-y32_?B<mlC>(
zlB#Gps*#Y^soGQ{@|s=j@(sVd6_zW}YBkO|DNgX{PEswDne6Vh3vL?teFfYzxFl5Y
zpg%fyYdcf`*;*d?JloMyqF}6RS{iO}dHtaqcDCKRq@J~R_2DhOaUSmLHRy@A)QfP{
zcO^S?#zFNA#ZD$z#yY?4SMDbIJz#$jk9l^yijj6HiT|#t)o>#SzlPFT+iK%bJra3S
z-s6k|Np8$j*^brC-(Oml-2-<0yM?1V7xkgbHdDG?BV-S04yBFZPvTD85tF{klUN!1
z8>9s$-SxjAyQrsHr(=>0k@#?hwLMR8^jU=)yKqm{t#EL7mTjdw;Pw|USod)juk2CO
zm2U`3#%hjgB6|Y~#1cD$f@+tEbM;2JLfd1_Sc~gohxkW&=@Tn5<^W=@d|y1CNojS0
zGNmuM{k+Pna4*Ig8JP4PhsZ_VYt=1tFW+xPkgw?=7>^W#B-1K^odZtz2pemv4qH%;
zXP{CI>Qyd&m~w7)dun~`IXx@hA`a`WQnTh6O0w%Rg8kHXHH_qfqiszs84mg);i~$J
z&Mtnk!&b6_y^#SUSHQn4S@RRq-Eo(d?P2v=5qw=HJ;K^PAEWcG&;iK!MNuw$OTtch
z8~S%NOw{6+_ocI4b|57*2gja*sXTOICqj%o2z*D{`Ey7?*{081LE!=%jTp)l(U=e+
ze*EBuj9c^9^Kh|6N~J`~O8A1xrB-%yABo4ubyL4(TOZ(Xf&iFC{0vu@{HAVkccS|x
z^us1d5oIka2_;p!i#27z=@;8<<EdX?KoZGc<OAvNy7_5FAsGz92(cV5CukNvIK{`i
z(BKc4iVB<7{-&6q^DhfV+{d~|u0MVGeq+;KE4)g}YCkJqLALapAG6kP!$f0~#N!es
zNga(suSlkRol@ZJBJ3-{XU!mO4#t<PsKm4{FQ@r8-kTwZQhXUaB}rP3A_)j257(bX
z0X%0vu9V<)P}_p5KIxl9OSUgP#<l-08IRVVHju7}kgX`8Xc)jq!>`LzG*5Cexyw-=
zQ`?PLzPB@HqA!Z``x<GpW8`K|e7lgI5+8wvH2#RBf$j<vn7oIKm2><YJWDwjJRiJZ
z29Mrb+Z&wOr*oH4R_Vy3?b$N5DxV*5sOqsNW3n$UZ@M2uFmZmf(AaW~Qud_wJjZM)
zY85*`*P`|m#q_a2ogk@@Yjq1}rv13&8ED<c8e*90zU}B0H-Fkyh+vqhmsx|l#vl{f
z`ZntqX`U&da>&hhlfUK*MorjM|0vko!*+3dVQ++{D(B=VVJ=&T1?wSWP$MXiyfwL&
zgHOXz)w`uaCR6<oyV|%NzV}H1wWh3dLJ|3th|w(kv~6>@>aH%*9uu**rGs!XyMzAc
z%iV3~KID^4>P)w0>gVXoeoh&zm{kU`{Sq0ck|aKW9ti(DV$OLpdKHH@;RF_Tq|)q#
zIV{`vwDM1Q8AUdqjXH5ggtN;j!QlI~no+6jegBmM$NXwd%Im@wPf^q0HRe%4^2ax;
zXd0GdC<<6Xldl#8t(-~O7<l42Px(?KW7$NOLXqE+4)p;3f-mVJJp|ZjuQI0CM|u1O
zUK$ny2z-PY#E?v<D;j3v3_%CS0(ZhMype~##yiW#gouH-2sLP5^A%!x*jS8*@<kVH
z&lVJ#C)HjL+8$b~ytoMo)EHF(QN2Z=;+a~T`}RRKQi1SbQ!FZT34+@f{4}FY>oh`f
zn!7$J0$1&JA{#cm{==0PSpZ_+(v;LAc1$+{Ftm}xhh^$q-8k2qm|U*S+O6yz3*6=L
zMmB>U(khR_eQTR>U_%Cb8sM#_n+7{71X@IL^C$Or(L+5CNtyg+f6}04PIS}B&vx1q
zk(Z<<v&ajz+so&bj^cMGUGqGqMyFP3m^UC|@4U~PtvOmUM&a6a{CN-03trXbUs*v0
ztjTU&Ox~N@;FdGTNcvOF>U~UZ?GrTJ3MJ8_ISnsD$jEZw4hExsofhk_dH94~02iA^
z5SA*E(jAr^O>y2Ro)K~`i*63x;-|<zn-Ja;@oShR$LrH4L&Y^M>%*)lw(ye4c>go!
zyyy(6dL-s#hXZq(xI?;^m@chM-T0N#Thg(Ih4=ab8qtB1Jr|t$*0+c=dpTQPzi#@N
zS(iKcrm{+MU#wE3SwF_&|5{@gTXZsqU11aqzS`v%JL$I-4737nfImZAJIZn4lII~V
z4Ch~{<G0F=K&C#jzvkTBBo9?;&@Zz=Gwfmbcp~hjmm#$%#;mX4y@|Kj<&EFaKvLUN
zk*TO1b+<o-Y=noP=4qGBH)fkk8+_3+NM<r=s4y_HHTY7YwUX^|+wtUcGM?^tLL0QZ
zr?dt<UT66ox9nxP9S0aE6vvDW59`(YBV-#;7}ga80xr&04<G49RZ!<AGn?FV<F;qS
z_6S>++3#(p4TQUYRIjy*5S}P`xEIBXG{qIi!0dK+T6(=HY+^VRQluHy>{Y<w)^3UM
zujp}%n~G5(6K$IH!Iw(YKpWP~8|mF<@pW%ujMXQ2k27<2QL+|2(5=R#VxH_(Kz*wF
zYPp(g7ATN~%q|utF2y*2rMfSDUlXZ9)eswA&(bflT*i<kEhcGteK6jW@5dQ8>qMTa
z%ub5+O!RQfmt(H=^Zshwq+VW8v*}+O#DIrh2rVp@z3MX%rcxDD9+)AGZ5Y5g!gOeq
zi`@^_&m1qE2s?&3vRTl`O(@M@akeXLbWeNz3m~v*w4{4*%$FKPIN<qhr}#_ea#QZx
z%rcQ3eU$eYoqUljT7Q~{stzq4&!EbFg|9?$w*J<5=8^A9i?`WwV-7E;ek`8#lxoV>
zvSNy*7l`I3i)Ot?m4b;c4a(Q)%BmL4u#Ihv*{)YGKpJPnw_W&R4>66dce%$pdr6V%
zkpujwTbb#Uvm0Q%ckmsNg=G>>S*`U}l06q+_ReiNkjj;NdtHt$EqDozloEoWAT761
zpwXaUBkIzR`y|y~SVyFYsc>jnw!BrkC<XB9X!^AF#Qmr)W=^>1+bWIL*8NSj?DFwO
zp@~D2gkh4VY|uYxUtuzADT`4pFvViyLKOHCyz85_#B;$by}hWRyxJ`n<)lyuz>Txm
zv=pomRwCe15h}%pPU9APk`$&j5kM}?+pw0)jfwTI4i)+=9LVi3^^Izat=J5L1u(`q
zzqyR&mxJyR@z*U!P~!=+%RP60FvasK2$nj<xT}(A%O+5z<+*bHj-3y`_(q|~BLjRH
zan<t69M<9dlyW*>6Hhb_c1dDMb<u6&P$6My3!UTH9@=i0lRceIl8ZX<ZLCV0S7|F2
zF4!;3`#@ebH<GWxmZlM^6MGQZ=*afkP8gx4?((Xr<k%rho7^EaFOh8HXkdECchy>N
zBtuP0U_>zK%iAq3dyekNhIj2TMVCOT@XEHsoFpy?V*K)W4*yETyEX1)aqu<d>eUwg
zQoEs`GEvS;?f$flPE32UFDgY6HA!lD{WWaSS?tMZ%vhhg?y!253%?H_FFQ*UQ#&$Y
zANJ8_mNbN*hHW4D=5S-jxV!hkjqA>Jh09UT5lPrDetyXXj`yV`wn1@sFYW<O7b73j
zmc_6;v{$igjVWwrAi+xeXel<O$t5e(4w@<Q215f|<}GO1JX7<Ka${LN#K?+o!|X-s
zhi?wBc4!Cv98eb`ZHUn5?TY1=RAWEqeiPZf?88}lOhm8eDos*j+TkgyPjgRFq`#E<
zx%hq7LDMekSD;lExQ3Q)O6do{twV(yIUP@5W3IkxF)*Wq?tXUS`*p?PCKDbvjE;Ir
z2}A!O<eljB>ZUFX199S#n`-#7V4m+c+hDHfuB0oqNvVF$)d5@wn|bFSWju%u;_E)Z
z93N-3gVnfKBb~%By<`5o-go=}$pSquV*M$HjFQ{BaXTH}r2XntJhKDf3SFvB7v(&y
zOnEp>=Qa5lS4}Zp8Io0zjWwUe+zjzR-fDz@#Q{^HysF26@7VDRKc-_b0~~b8GAb3%
zu1IQu`qavp+WwE-1D;G$Aw<gx96|*%Dl+iV=81_N(aidZxeXKnXlNS5Bwy@tCK?|#
z%_TU=wx>Hig;V=f!nsZW0Rv+&9R^-FW1cJ7)}?%!;MWlCf-<+VR~}xLKjN0K9W9a)
z8Z6N9><1}?#b>b4vRN79H`P9@sTqX|d(>)JQ_N92S~NB)AP1BsN<^xaqm@!C;>=kF
z_IZ5k(J|84Ee(d*<7=#*xnNu#J29VTid{b$v>0nVDqW6_W|oryFn`b|DWzuDV7e8)
zee=cG_EN^Ys10?^S{p|puW}SVQ~By8j~_>qj|x|=3RyLAUjC<E1q13??CwUYy`%&c
z9Btt@Ch7DHzOrF@0x7w;r{tTTC2V9#4-n=8zl~j@76>~h9Pt0X5NJ`JsQyuoudvo!
zb0<`;J^!N{du~d{0l4CEC5bAC&mFTEr^*L=^tq4}%U`B&0n4whW;vGn<8yxv*ti4=
zp8dAbFXHU7{h?Zvo5aV#)Mlu-!jK8?%Mn}L^ESVSh|OkxxY*A6l5zeNAH_aN`;}0d
zyQ(CZ`z=EBJME6IlfgfY>7}VRHMiV{;jPaj-bS^sj=O&R)dIY04M;vXd+qV2rZ?3g
zIYo2ICRM}wqR~FJ2+EKqE@0Wa8^R6szArSYZC^Og5powuN-@e*+iHGa=(b@KLmOWj
zouqaaZK3u2pkjDAI^O!~0R1boo*yk8M4&7OUw#x%zoot~!}RmlFWR;Yq&7NMJxH^a
zlVzo`4i;3TdW8U*usWTEV}sV<Mxg9>+fR;VcNG%?OBY*#0S=8!d<~_qh2v`LA1K{p
zcp6kaVrvB(PFe-+Otb2Ip94OdR+oyw6Z2)Uwp2(-8D-+>4<^Tx(c4u=Zg0N|JbJC9
z$NXq`nHgiw66a|$1QFTS^ex+?6fMf>;$Jq^2oC$C@oo&jInz+2&CQu<GCq1QoOdE{
zhq-?d<I(8lj(AuK3>|bfPd+MNaf}_TlPl0slY~aei`DroNy9GR&FA!s9K55QZ9@~D
zsXM7LsPkp@Byo6`l9Q#aSLDuf;%dgTTyAz}-hYrColy)~!#Xd!u-t*k$p1mOm&SuV
zQ9R+Z=1>lJP2A6$F}YFYkP|#)(J_`?&B(e(&zWZ)Ohjx^*Y!I{WwK;-bpHDJ^UN!y
z+{YvGaO~$5=9LTeROH82Vm`5^q+nA&HH@MVDt~8NWctuk)1Hn=d#@`SioxFKfmBIz
zB?FgwNn`CM3xS{g4e|%R)5n1cy~ZEvN=Ij-eu~5buHhQa*B%KtiOG1~pOMXm5;wiA
zXIlqQKJe@}J02A&@Dba5DtNOw=s=rEH7b<*UQ<p!_r04nmnB`9T1aIDR)~NNf>7>+
zW(IL<OoCgSDWbYwsnT?`V4UTMj^074h4GN5LX-Q~@8OO^Wu&!C=7M8IvSYaYDQ0I#
zGuK<hK;h^qj%+*4JOD4b=$h`!jflMVK8yMJ1>$vKxCdT>$CD&Si$S_IGyV4}J{F~7
z{>GKW^S+tn-MkxNM*2|GD%nGYw-%5_!x~lk$yD=C{<E&=5#k5+0UbD`ci%G|E$Rl2
za&=Z5;<c@;8lh6k!v|jw1vSCT`)iDIgNo!$fJ?Aj8UI<)SG8u`8XlJDWFjFO0>*l>
zYRm~a_iQ;v;^dxIA2Nm#{BHWTp%z^6%MValx1#jc#`o=vVQOAA-Ztigl%m9G1KqZU
z)LMcQK1<q6?-{L<ycNv$cpCY1jt1xPj9BN2tm|tUU9`EE+9*qY3s+RC4%X;QVSoDq
z2XxD_x>X+d@+Ua_4ER2N6<1DRRl0RJts;C@Mt#X^;P)yZAVMkGBiOq|lTRQ`msj9w
zRGUGOH(uRrjJaU>6Md%A9?4-i4F4M#it8nsnICO$TSFDDQB$jA#zx6d>b3pc>WmXF
z&C3W79&$gS3(s%ab=~C3-29%KU|v}hk_6h_TL+fn4uGYsA}MSu=r6x;yG*a?#{A*=
zMz+V96Z^(@Hd9d@KUD+E;f}w$pRbm)G3T2k<88fx#6P2r=T)@HFR_n&ZHzA_iTI8T
zLadtjPikWeWUXI^e9jpIaf|%?lESvy{Kg>pNRRXWq#}tYXr;M(DaUhUG9DB#tpzMc
zC;pHxYntEb60AkfUizR?uq8#|<^N-p+R%7VIeFOro&7_m-gRNcn#Yyld}^D2rv1D}
z;G_gYu&(4nMT}KH845G6ErYdH-LEJw!G-jBxxL>$@4CYy*LT{*Flz%ScBy3AmzHW9
z4_HW;_q58Y9EV%^56?5o+GX(<m1=<eCa;3l+^z~ni{#i{E`vZHN7`KsPpPy8Sr0D_
zYuaV(uoJb+Js!t%?%x-)C`rApzR@e=&%jT%aWtG>S3UAkYlN|W6g^SxzK^NtiPewx
zpe)5j7kf&Qz87^=-sB_UHJjxLF{U(2uXu<%qs=Uq_@LX$kKRAba8i;JYg_?O*wI`L
zhT1K13tp3q!Q`Dws7s6RebINQBl+0g;hEo!_l78TU~|w@b<PJ03?Y8DjA<8Lks;9^
zXR2b}&{T;Vu%U6#08Wt34r+`6-W#U)BdI{hsH_AU+Yk$n=QHAMy$ITdj4{gT!Vfh<
z=}d+Ku0Gf|@EaeHRL3|^rGO0rWYn2Jlg%foW~NwG#7fYk!e*!X4hAD=E!kRS0#WEA
z^ugzz@<ze5D&`ta6*j93I)hA64Yk}LLJ`%7Gm*V%5PDRFYIPMa-8`c_yX^3MVo*9&
z4zgcWx3dhM_6grWR(i|)ZYpLQ`LcDo=<Y2Ad%(9vtS}qANTLao07ghQtk4+3y^F{b
zuQM9-fOV;`6*=4TvI?PNm2TxX7Wk<~k^DqD**@wTNtI>eJXjTef*D?<BdCfgEHjt^
zccc-@bidQ%{H~UPG3HRpHR+q1eC|e%IM^O%m_5!8#T&;x<HebNj||K+z(oLx-ct%2
z$N_!1aff2{v{nTLMAd$aN_l?)TgS(s>^j^9_W-)DR_^V<Pug*!t=Y|bcf&jQJ)uVa
zWvEwhYcxC^p+nQGA~2R@<4Uxhs+KKtZumplA9uDtE?Of|?7czre*<L>BWS*{h1Lz9
ze6)NFqZVJJ1q($&8}PH>gkqtd1f3*3D!EhAH`=J#3M2}^CQ0I>cu#Ee&1Cl^!df8|
z<KxJ^9F+;dFfZ;IOPs$B)w)K}Wz3?tZOukoRFu`VCG~I=*l|?qhk4=3xcR7Hloi`o
zSz3%FU8=&6TV1;NMOs=81yjgrsf0k;yFh=$MO^=dYJLCgp`tB6;`E7k)v3Q|wFlqT
zK$h%zK4gXf>#A06r^2!0)~HY~DuX^1i>SEcr0SV*%Ry*+g3Gy2>TUaXf015)kr{vE
zZtYs%4J~_a1B=Hzn9TkOo@|n_y$0__$aXaOHhQ2n4DjNU2*&wc0j)nG#cqh;HT1R%
z@*A%6WQ*cYGH6P}so1vPmr!t5C$&&X1P1HR%_tx!04~U=zrC6ei{!?vXV)<>gyaxT
z=#X!NjLzR0*&G$hhSWKg-ML7ij@xWc37tfQ7QZlkc*O*&W4jqLWFqjrse?l}7nS|q
z^c}p;FVczCq9d=OUb*B#WF;DRFNXh{qgz#}nKb)MTw>-Rl-7}+(rJ8mZ~OM8zczLt
zPcqOMy2p9i^2ynUJo|;0o_KI3^g>7k&YKRZiu*{pDedEr;4BPH4`UnMZi1<TeH86^
zI2+&9Lo;M2T+E!5rJ62>As5hn(@h8_7t}o#$zmVfv+J_!nt6e$k;$Pu?@=T-&nUbX
zxkC*$lx$|r&PMUahbGr1wMben3ro$r!T_65=eHFblP}O2?_0ts#$>^806Y)UQ(uo=
zFdH#-)x#x9r}DwmC{>SPVDV>w(ER8WdwyubOuX?ESQp3c9gC86MR^RkA`>^bj@au=
zz2zONZQmXREfTi`dc6qZO>?eUBn8*(*6K!R*Id{g9@KopqYDOHMdhy#V}eWy3pf%p
zX=kAj5eyed7~5`-cPV#yMZ}zF&Nds+o>0{eJ*Ry*#u@g*tGULhe7ym)fxm3qOUqV5
zK|d4IyQ8(He6tJt$Wo2&Ulk(??}!j+y!oVX1BNw@m79BU&%a5X9NBAmvgvC1g^2Z>
z0b3@E2$>YdxExxHk-W=`xwbD+2877Mnus+|_di7pHIUy-Y?|lk3>8bC5m7t2ofcCv
zDiEpCt}~p86Feagd42|PhE*ekOfRFSWpIX{UYXw|!me*^6%M(%xDLHdy?)i+q>27u
zjU70*B^abpSleBxcJmwn=7*$%e`eG50{WDIGD7|;F?3QqPDqofZ54<57Ql@=N4fQn
zP*#({8n#a0Fu@(x!95f1A&|g!)rx;U`O$&>8E$a3q6^R7;(q#k8%VSB*dKPnvh$(K
z3pRQAMtuWqCm2OE7HT{s>3Ed)yZ=WvVkg#3IabJ-HO_5<w5J9i=J2~h>nQ4$M5y$G
zl0Ug~aZDB5nv{*w<_YC}5-^wjc70N2NUJLy<>?A(iCaYH24MXCk`(z~S2PgGn{%!q
zEPMaMZ-P4%?~Wce+WYsS<YIF!RMI}_0NK5?=E$dnfIRihN%h}VH1|5FCQ3w>1%`9q
zOQ6rW*Y>XAz<^xb6Y1uv^!;F0^3=WT$l9Y5_$eM5O#KKC#orju2ZFL+Z%u+s6tMgg
z#!Mu=b+FiE=PrdeGV@>ibiM$HGUad|@50s_oiVgr@^{LPjKBbpAhgbg_sA^$SUuhK
z;p45@#%S=~rzBpe5ZFZQ=F<ETABv~>11qpC1pxWeK^0M+1U3ghj%Rw}8Er0}iNFHf
zx9L4kQP3v*j&M(4i?D^Mf}G|&nB&Z!;MIK|NA3Lvq0e4160pnD`_RypKKAG+LaKG5
zy-{JY(;N`hvtD-58RIOCoQ?J<pise+1>-MVhy#AAh#?7TQfGN1gM^;_DUN0^nfewl
zPEqkj;o>{`8=zdQbs)shA@VDXWu)8{1#_$q>87ikgjFXHYOCH;cbD$XuqwZp0=F*5
zqjkd_wyph?9@I84!tmh;rPHFk{oOV5J={kTRh+fOY$w=`R21Q!z3V-LU?-}&SNA$$
zf_LYgFzX71H+)g+Fzr#tg9F1NBAS~YVjy?P*lX6CEBk@=8UEN?D4qTpLY??vC#@Y5
zxtk0aAp{DbHS>q4$|wFiyhm}3b(nkDja5yI7=~=}t|rh)?sk`VYPMptS<pegms$YV
zTD%tmG!xeiPejf;8GJ(hFfhvSH4_Dx@%HKXqd@wxZhKew{D*fpzaPD>)_u!2{s7^q
zt+up=^x_M`j_n(FcDQxqBumEmVV-K%jESR}GvJDRo3+4^bKtwO2p^R{0c^-Ym7h&E
zHvWwta%bY`lQb{l&AprrAh#(>EyNaE)kl0plYIueuy?A}bsewjV-*O$$$WN0zHzsI
z@hfpxhxy*H2Xf}ajm~>LbDCH!N@2~r1OL$gr8C%BT7&{UJ1}S~WxNX1nXMoq-N}si
z2rv~safdDF-ykONBJ=KU=aC*F-zW5lR$M$)=;+QgiPw@s{36PD;{@}^^iN`R*B$*Q
zdKDZ;%C|oV53K&0{KeLsowIL%-;wTM(dK)D5ll{Z0rJCbsuyERKz?gN{gdNL<27wb
zvh+{9(giaKzeV9@P)<~cYfiu~5z?pOAHXhkQ__@nmf|Hlna8NI7JE;AJ?Wd4;Ei*7
zA+bT-kO|sj$nXh{f9eByBG(k;&8?f>@t55msc@D0*Ipg2#SKK+`kpYJ1K?lOo*ZrP
zO*#fatYE9oLDo<QMO}=*&J3gO%70<nZ;tdE@s29rEu>B^SlCJ~de*nYeDwsXGgI+=
zng<t0n|4kWH|1m1D+JZ24$2}ozuu=y7%pu+;#xkTJSQy@ft`%iXr(vIvNIN>$eWV)
z_O0}Zxra~ufYKQP?JzVB@6IaFl)QN~oY54`*Fiu50>C+AJk&MTqLh;H*AY9c)K3|s
zqV(29k~wl~g%JTNwAng+9n?U46_YRq&eh&&OIK%(uJyKFmns~P3k8d84Ex-Rc1@~n
zUFrG5LD>!}l6n@fAE$aIVJ90;_nre=s@~pc_w@WcqANGJKX<QE_6{IFfq%r?)dvF_
zL{B(9is!exx=<51K*rlg#JI*(_N{|D<9R3(f&~l1*_wYKD8{4!0>sHRKEIF>p@=Ry
z#Xe$;`(E-H5lPf;Gx^{`?xw;2(NFFX4JwH8M83&(=7S7nb}vt*MWPUqr<aW=@QQE$
z4Uwv~jsK<DK+T^h*Y0T(zNll<NWE0{*(eMS!Jimvwz16sw{F-)0rP0$yTYlrKZfVZ
zznO_3gou3G<PJm40|aIa^&;GBa0SK)l`A-YH>RsvJPIRdy-D3LDUN#E!|XhYaWB{k
z(YQ%2>pRN%>=dyhh;myRBNz;*c4)A0k?W$>j)Dn1ofSTZO1r*_BVjp1E8CEA^=^3W
z3BQR#Q1v1ZVtlGK%KST}=P87Jt??IPAnMIY9&{4-47F#W1yDNts(kWvpQm`oDA0O?
z`Pm&KpwNWeC%xx{_8x=aX<7QDBd+A3c0sfp8XNJ9kNb=$eTEFaPn8p?_&m9^h}y7L
zxPo7EDjVoyB!WNOdAI9v`#~<SEaD*=8pRq&7_&}e2cG~X`+VDmoNYAJHuV(nlj5^-
zr|rjILj;&>dcY6`h(6U1>4a?uG5ASpH>~F=0oo`LNO&Xpk&$N&=HA+upa^<`8CZ*n
z*hv0J;2Tp4UB_OtH7dMMWX-hQG35UMUBQ3CGV+)m%5B!zY_qlWNnVIR?9%_j*NoNQ
zrF`=}nk!I|J`z~A_})uy{lQsLUay(QALC;ss_$4=gY~B=u)SUEZRcD2_~!92o?1~;
zn_$+OW-?vfk*{HnloyN@S$z)4zmF*E)2Bp-qmcPqg&15qsq2%4Bj)H(6~DK|k4@Tw
zmjGo=boK1e5q=KI!sr0u$0x);?|ZCXY&aJYfk2JYe}y!0g82V{atX{l+?}jl%`DlR
zU7g<9*ju@>I{U~-aQ}(df96;r0Q$AM9aq?2e%)gZ%fRm9Y%TaoI21&lw*3PBD;_v!
zNa>4K9IX6We89sieNV%M(H|aNDm8MaasFEt1^8>6h~HY{Pnc*zYl~9Y-}RB}cKbVf
zV;&E0Z;n#=f&;}4Y0`|WnsN1=!2YM-iP2^8plkE`WUFDzG224A4B7mX2LNv}hv3cq
z#xE5cXa04yF#$YmNJ6zmMkw+IU$)OwTgau06y7fg&Mr~O!gNN!4VMVh+2<79niT%n
zbxT^Ac#c3woJsC5dF#UkrUu1}j!9}w89p}@`Vy%nl-!|xO3jJSCrSwM_t#36VW_gh
zy$2wKHtyoOgY+q_1^kT9n1D&Lz0{Vq-G+$FJQ>iZ&zv8PqVWC_Ze(eZx3Z0byif|7
zQ#aAsCFnsu5fjt?(zp`xs=!H|U}6zww2WkdoY-O%*yD~z4NaFCDc0PXjDYySO$W`6
z4OxF?Q!|$qi7uj7R68ekC#_4M^=TEZ<_fXRb_zRl_DT?g%}{pY05YSpy(aW@st+*V
zIEewA!*X`!8@6j?^FuO%<t}g51k2SK>mzHmeZLyj=Mi`dW>vhasnBk%csISMaJ}Fb
z-gIn)07tAGgU&;4ruJ*suH6Vsuo+-aDbqa<e-V`DU(hJHM>|G`&-YdL1*C`F?jq7*
zyJ=o+OI<>w0;}?=&Y|9EK;f<^#V`x#7+Z8VU?fX8y)AkpeiU5rIczFXy^VtP(}2F|
z(9YJ+o0)!flH{jAhuLE3Hm)QmU$Rd2iIn>^rP7pIqRK~#O(<<(|4|T3<Y#I8;=8#w
z2i+=xe}To1R2iuZp6P9>k^b^bVUWNd&!qbQc_yo+mAQwt$zKdMJpbdPgg~OUyxaEy
zJo1Sigm^2vd^$ZvJ6b)-$c$Wk#R*#d_vy9!_);H;lLZQ^)d-a^f(hYYh=tA?zAd$P
zo1nTyC>^2G;@?fMbDik7UiXJ~Yh88U{pL=hVkh+#j{`sFJ^-AKs!rRe1+`z{9#>SB
zDzY4%DjarPl{7S-K|LxOE`X6xt12}MHG+0zPg1s!;@&o?nP!^iDw}8Y&}vK37T%I;
zUUIBq#oc%32lN){utSn}t49czStp(>>31G1+P14x9AUcx$XUHjb{}=jtvPiig=E)v
z&QDcW*O73m#uB1eGq$uA<HkZJrS}D=U-YM2s^-|H9Md~d!hczCBLTu!He*;u3346X
zteM}}Uk35oe_7qX1Rn@2{M4!BfmMWpvmDzioS->ah2N$mHyv+$i#f)M7p{vI4{uR~
z8>HUm`1E8Gr|n;RLtgVg=zjlp#GVMQB56lsh<km=KhA3Xwx+euMf&ah)bz{M#zLQ*
z;cwkoo@3R$G+I%pIo<%u7h2-}mBHEQIsUUQ1pK9M0g6s58M-hod?!$Jn`?PR`nw@$
zMV1%_%t8aHind>^S?fbV1FMd~$ENJekwjq|zjsKuCq&xgi~HhT0dZ*x#Y4xrhtB<i
zUB<%*i~0c$4UuYw*o((JOO0WXTQUKQspK&!tur+&)}hX_^sqqeWbF!nj%)^wMuhQ{
zJr_?9cH?1q8>0c)C0BrfIA-27uajYCZHf0}P0Ol7#NI7$Qi(w@zSL##UYh-i_&lkN
z1g>+tq+i2h8r~2zZ)8m|55R3iBf*OVq1Tvv0uAlSW>Q9r8yQ4tYwYXWhvmDZkPU6P
z3T`-ihOH92TmcqjM<5C-FINjE{?wXe*+zNT%}E>|%XGos&<2mOh+i#Aeo2|+g|VB)
zDZ}d@uZ6zE5XZ>I9BdN$G4q0Am06XJaTa1*o5$r%TXR>b_|y+x70DaFWS1j*BV=v4
zvV$I%nm^;wFF1kkHNCp%Ccj*1M^lhQK^=<Y_wsszzYgFbUD$&;8xT8;kgI($O(s=7
z+J*E6G=<v2!JjN_m+Gpy?I|<UU&k`8D$jiswv}m_92hjPyU4KV?0i{DdNR6K?3b;Y
zk^K&%E}u3k(k~ZMWy=d+dTQB0$u<1-{VsBZTfnE+-zP>=$~k*)6I2`Y1DH1oqKG24
z;}UWa0|tQzUouRypVVLZ-LVBFJLTO23SL$9XgcE3moSqNWw@^lL?H3=xl9)*r(YYr
zmMRL~ymAJEKULe8L*XglOT4X@s%jHu>q0C01wCr1X~n}~PQtI`Hn1gp;m~B+>>xNz
zBzqRWE4RbNGw|P()Ly;}WG3)&MK>HgrdfNBnw$r8)77(vIi|$b)#FTw*Fa!CHNlMt
z58}TO$@4|(t6_=cgjs#*V0P&|Gh}LIxaZbaPdiz+Wn`*Iqk-qq%Cj@wJm@xFA5zP@
zQbgC=CvBF}j<cLnLa-Exm0|W`5$1$osXMw`f7XjgEaoeJtqY?Eo-&dnIxUs6nnj>2
zUEUpFm}G(?%zxgzl;NJdgVCREajD*uIM5q@42>yua@jgWH*fE|+GEf?Wv#pOA(#L1
z@@<so&&J71710AS>gnrWWI|(m@7P-VJWvNMl~sPyn_LAfRB3fyi@~lD@rqqC9Y=r-
zxx4$2Fb(2Gq#?g#UXy0V3kgSCn}=0-dtAu?=$<;$AB=je^A!a^IC1GhIV3oOKc|KQ
z+>ge8Qug`@RI2qGSndrPE+Ef&d|UobZ92mI>y&WjE9>fM!;4#mF{6d{Eh91#xySU^
zTZ;W{1MjZcrN30xNm?YRwbvi^T6!gk@?*gVhGvp(vlBqaNu_B|+s7t;J<d}g%by0)
z1MTl(@S`)^6c6<VRyM*t-!GB*S0JsRY;5l}ToM!5)r+CRoe)f;0zXH>7`fd;@NFWK
zY{2*ha|OSvwz}G+NiG)PF1I6tfA1l?S<e6;Pm34@l&wo7uBRU^h}toxtPk8SD21gk
zW@s3vI$_+?E2r1Q9^l1J2*y|36CYi2fIt+RqJ&^!laQaxm#^kUd!JyX8o02Av$p|$
zyS6uZ{xqG};`3J&KC6g&t4%(u_C(d%>JN%8Ie{>@f&3PJx!A=42@~J;4mf->vU1p_
zAQ!kllW6a}X#Hd>lu9@k@97@*n23T9@y^j%zTANx@!Xx>d7RL06dw?2#!uDN0}7Ly
z3(uKiJ0h;7_4D1lP4be}FMaOyb387ry)Fq3@O%@E^B1PU$@^%&kdBP-Ytb+-9W=$C
zqKa^rcBv<p^kJRKX<l<AbPWi*ylRg+BMau+PE=1OBS*TdlpkUX?2|NMd|=NmG>xm2
zcCWKjNBG*96UFrc^vO%43E8X@)zdTsb+xUT;GRRCbVjlVT#3${3Jpu8ToKMrWG?{1
zrKjT8H*GW{Gc!cHTkgg!uXnHM=ax!?1(0v6h%7c)(0;DN{23v};pPZ)Ka;$fV*W)l
z_XfoL=Q{kHwgn0OAA>`Ne}OrX`7G?s+}tQl-db5WxmwD2I9j;dI5`TSP*Pf$IlFn-
zTTu#7O8%>c@<)HFIh8r{Gou{yA7Z*^4!QrRC;VUP|MwOC--aMT9L!3JAPaXVSBv+o
zwl?mpCeF{CfA&@=<{mcomL@ii?)Elr?kFyBuZ&(<xxZBeDS|$@!2eY|*T60aA6Wlv
zK+Wk_VcX{ou>ETT2tZhnF$ZE;G*I2SLo5s{2PZ#22Rj!h2QLQ)H|Lu-Je=%Y?A-j^
zyl;4TczAg^xH#F_dH&+{^Vv5phg3TeLeQOwAzMzk5HT2(xG4mPqB9d*{4-<TGf^R#
znj*~ekOl-oLi+O!0{x*2lzT4!%oLFK&NO`gASwKB@jujo2+yK_X6onp?`aXz;mrO1
zZ%RNAi2Uy#pKDsrX8)TJ@t<6S|846(gn`ek$^M~h4<3Ty!bbWx`yU8|{SN^H1c<&1
zE!p1;79fz&KLlXVK>vfB_`?)PNcMMPAP`9AACiT{|639XB1(V)QF3L1Tj2h){*N2t
zK`Nh@#d!X;EX+XxA$DVe1M&X59SUU34HM$$hDr7}4jBXz{)ZJ=;r}ho1X*+^hKrW}
zvyO%3U;733`H=nVnEed`0fBh_p+!>hPYd2>`<P^Z$I*d6BL9#w(EhhAy8om5@4K;o
zxy0WMz0Lm93ew?DO!oKa7zE<{TkN^k==VpGQvClnQ2y`#@^|0<?>+J*@GmV`|3?c%
z)D;i%$peY(KmGRE=9_<P0XY1xwtr3$eE+Kt|F?tvr{6s{{-=8ge)x|9q<<^;AG_3W
z0ui9+4oV0a^<aWch=5#s2*YAWLU=ss;Q#7?CZ71PNs$mQPXUGJ+uFaTf0rZw)9Ls=
l{%gIO;*00=8VCdn`PVYcbD^e)@Q2<KMD+amBmXbi{|84g>b3v?


From 610aa61c2e14ec029fbefd9cd7e95b276c657783 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Wed, 30 Oct 2024 18:21:26 +0100
Subject: [PATCH 13/15] Better device check

---
 src/KOKKOS/pair_metatensor_kokkos.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index c51d90de88e..3fd275f9a5f 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -340,14 +340,16 @@ void PairMetatensorKokkos<DeviceType>::settings(int argc, char ** argv) {
     mts_data->model->to(mts_data->device);
 
     // Handle potential mismatch between Kokkos and model devices
-    if (std::is_same<DeviceType, Kokkos::Cuda>::value) {
+    if (std::is_same_v<DeviceType, Kokkos::Cuda>) {
         if (!mts_data->device.is_cuda()) {
             throw std::runtime_error("Kokkos is running on a GPU, but the model is not on a GPU");
         }
-    } else {
+    } else if (std::is_same_v<DeviceType, Kokkos::HostSpace>) {
         if (!mts_data->device.is_cpu()) {
             throw std::runtime_error("Kokkos is running on CPU, but the model is not on CPU");
         }
+    } else {
+        throw std::runtime_error("This kokkos device is not supported by the metatensor kokkos pair style. Only CUDA and CPU are supported.");
     }
 
     auto message = "Running simulation on " + mts_data->device.str() + " device with " + mts_data->capabilities->dtype() + " data";

From 82aec56073ef624c6f79ceee30018267d2505376 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Wed, 30 Oct 2024 18:34:38 +0100
Subject: [PATCH 14/15] Remove readme, add basic installation line to docs,
 make example consistent with non-kokkos example

---
 doc/src/Build_extras.rst                          | 11 +++++++++++
 examples/PACKAGES/metatensor/in.kokkos.metatensor | 10 +++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst
index 5b39dde101d..f3f1a1937f6 100644
--- a/doc/src/Build_extras.rst
+++ b/doc/src/Build_extras.rst
@@ -1049,6 +1049,17 @@ https://pytorch.org/get-started/locally/.
          make yes-metatensor
          make <machine>
 
+   .. tab:: Metatensor and Kokkos
+
+      The metatensor-kokkos interface should be compiled as
+
+      .. code-block:: bash
+
+         cmake ../cmake/ -DPKG_KOKKOS=ON -DKokkos_ENABLE_CUDA=ON -DPKG_ML-METATENSOR=ON -DCMAKE_PREFIX_PATH=/.../libtorch/share/cmake/
+         
+      where ``/.../libtorch/`` is the path to a libtorch C++11 ABI distribution (which can be downloaded from https://pytorch.org/get-started/locally/).
+      The OpenMP version (as opposed to the CUDA version) can be enabled with -DKokkos_ENABLE_OPENMP=ON instead of -DKokkos_ENABLE_CUDA=ON
+
 ----------
 
 .. _opt:
diff --git a/examples/PACKAGES/metatensor/in.kokkos.metatensor b/examples/PACKAGES/metatensor/in.kokkos.metatensor
index 39a1cf644b0..1a4b9b59494 100644
--- a/examples/PACKAGES/metatensor/in.kokkos.metatensor
+++ b/examples/PACKAGES/metatensor/in.kokkos.metatensor
@@ -3,7 +3,7 @@ boundary p p p
 
 atom_style atomic/kk
 lattice fcc 3.6
-region box block 0 8 0 8 0 8
+region box block 0 2 0 2 0 2
 create_box 1 box
 create_atoms 1 box
 
@@ -18,11 +18,11 @@ pair_style metatensor/kk nickel-lj.pt device cuda check_consistency off
 pair_coeff * * 28
 
 timestep 0.001
-fix 1 all nve
+fix 1 all npt temp 123 123 $(100 * dt) iso 0 0 $(1000 * dt) drag 1.0
 
-thermo 100
-thermo_style custom step temp pe etotal press vol cpu
+thermo 10
+thermo_style custom step temp pe etotal press vol
 
 # dump 1 all atom 10 dump.metatensor
 
-run 1000
+run 100

From 033082e7941b83c9ba148f5aecdf21d19510d756 Mon Sep 17 00:00:00 2001
From: frostedoyster <bigi.f@libero.it>
Date: Wed, 30 Oct 2024 18:53:14 +0100
Subject: [PATCH 15/15] Check equality of int types

---
 src/KOKKOS/metatensor_system_kokkos.cpp | 1 -
 src/KOKKOS/pair_metatensor_kokkos.cpp   | 5 ++++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/KOKKOS/metatensor_system_kokkos.cpp b/src/KOKKOS/metatensor_system_kokkos.cpp
index 8eccab9c71c..5b5daac72dc 100644
--- a/src/KOKKOS/metatensor_system_kokkos.cpp
+++ b/src/KOKKOS/metatensor_system_kokkos.cpp
@@ -28,7 +28,6 @@
 #include "atom_kokkos.h"
 
 #include <torch/cuda.h>
-#include <chrono>
 
 #ifndef KOKKOS_ENABLE_CUDA
 // fake Kokkos::Cuda for non-CUDA builds
diff --git a/src/KOKKOS/pair_metatensor_kokkos.cpp b/src/KOKKOS/pair_metatensor_kokkos.cpp
index 3fd275f9a5f..611d0529396 100644
--- a/src/KOKKOS/pair_metatensor_kokkos.cpp
+++ b/src/KOKKOS/pair_metatensor_kokkos.cpp
@@ -47,7 +47,10 @@
 
 #include "metatensor_system_kokkos.h"
 
-#include <chrono>
+// LAMMPS uses `LAMMPS_NS::tagint` and `int` for tags and neighbor lists, respectively.
+// For the moment, we require both to be int32_t for this interface
+static_assert(std::is_same_v<LAMMPS_NS::tagint, int32_t>, "Error: LAMMPS_NS::tagint must be int32_t to compile metatensor/kk");
+static_assert(std::is_same_v<int, int32_t>, "Error: int must be int32_t to compile metatensor/kk");
 
 #ifndef KOKKOS_ENABLE_CUDA
 namespace Kokkos {