Skip to content

Commit

Permalink
repeat let update until all halo cells accounted for
Browse files Browse the repository at this point in the history
  • Loading branch information
sekelle committed Aug 6, 2024
1 parent 3c3fc36 commit 26cbbc2
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 51 deletions.
72 changes: 44 additions & 28 deletions domain/include/cstone/domain/domain.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -217,15 +217,18 @@ class Domain
float invThetaEff = invThetaMinMac(theta_);
std::vector<int> peers = findPeersMac(myRank_, global_.assignment(), global_.octree(), box(), invThetaEff);

if (firstCall_)
if (firstCall_ || convergeTrees)
{
focusTree_.converge(box(), keyView, peers, global_.assignment(), global_.treeLeaves(), global_.nodeCounts(),
invThetaEff, std::get<0>(scratch));
}
focusTree_.updateMinMac(box(), global_.assignment(), invThetaEff);
focusTree_.updateTree(peers, global_.assignment());
focusTree_.updateCounts(keyView, global_.treeLeaves(), global_.nodeCounts(), std::get<0>(scratch));
focusTree_.updateGeoCenters(box());
else
{
focusTree_.updateMinMac(box(), global_.assignment(), invThetaEff);
focusTree_.updateTree(peers, global_.assignment());
focusTree_.updateCounts(keyView, global_.treeLeaves(), global_.nodeCounts(), std::get<0>(scratch));
focusTree_.updateGeoCenters(box());
}

auto octreeView = focusTree_.octreeViewAcc();
const KeyType* focusLeaves = focusTree_.treeLeavesAcc().data();
Expand Down Expand Up @@ -264,10 +267,7 @@ class Domain
gatherArrays(sorter.gatherFunc(), sorter.getMap() + global_.numSendDown(), global_.numAssigned(), exchangeStart,
0, std::tie(x, y, z, h, m), util::reverse(scratch));

// invThetaEff needs to be more strict than the worst case vec mac, because if the tree is reused multiple
// times, an expansion center might have moved outside the cell, which can mark remote cells as halos on ranks
// that won't be found by findPeers
float invThetaEff = invThetaVecMac(theta_) * haloSearchExt_;
float invThetaEff = invThetaVecMac(theta_);
std::vector<int> peers = findPeersMac(myRank_, global_.assignment(), global_.octree(), box(), invThetaEff);

if (firstCall_)
Expand All @@ -288,26 +288,38 @@ class Domain
reps++;
}
}
// update the tree using expansion centers from the previous iteration, but 5% stricter to account for
// possibility of the exp. center having moved closer to the source after timestep integration
focusTree_.updateMacs(box(), global_.assignment(), 1.05 / theta_);
focusTree_.updateTree(peers, global_.assignment());
focusTree_.updateCounts(keyView, global_.treeLeaves(), global_.nodeCounts(), std::get<0>(scratch));
focusTree_.updateCenters(rawPtr(x), rawPtr(y), rawPtr(z), rawPtr(m), global_.octree(), box(),
std::get<0>(scratch), std::get<1>(scratch));
focusTree_.updateMacs(box(), global_.assignment(), 1.0 / theta_);
focusTree_.updateGeoCenters(box());

auto octreeView = focusTree_.octreeViewAcc();
const KeyType* focusLeaves = focusTree_.treeLeavesAcc().data();

reallocateDestructive(layout_, octreeView.numLeafNodes + 1, allocGrowthRate_);
reallocateDestructive(layoutAcc_, octreeView.numLeafNodes + 1, allocGrowthRate_);
halos_.discover(octreeView.prefixes, octreeView.childOffsets, octreeView.internalToLeaf, focusLeaves,
focusTree_.leafCountsAcc(), focusTree_.assignment(), {rawPtr(layoutAcc_), layoutAcc_.size()},
box(), rawPtr(h), haloSearchExt_, std::get<0>(scratch));
focusTree_.addMacs(halos_.haloFlags());
halos_.computeLayout(focusTree_.treeLeaves(), focusTree_.leafCounts(), focusTree_.assignment(), peers, layout_);
int fail = 0;
do
{
focusTree_.updateMacs(box(), global_.assignment(), centerDriftTol_ / theta_);
focusTree_.updateTree(peers, global_.assignment());
focusTree_.updateCounts(keyView, global_.treeLeaves(), global_.nodeCounts(), std::get<0>(scratch));
focusTree_.updateCenters(rawPtr(x), rawPtr(y), rawPtr(z), rawPtr(m), global_.octree(), box(),
std::get<0>(scratch), std::get<1>(scratch));
focusTree_.updateMacs(box(), global_.assignment(), 1.0 / theta_);
focusTree_.updateGeoCenters(box());

auto octreeView = focusTree_.octreeViewAcc();
const KeyType* focusLeaves = focusTree_.treeLeavesAcc().data();

reallocateDestructive(layout_, octreeView.numLeafNodes + 1, allocGrowthRate_);
reallocateDestructive(layoutAcc_, octreeView.numLeafNodes + 1, allocGrowthRate_);
halos_.discover(octreeView.prefixes, octreeView.childOffsets, octreeView.internalToLeaf, focusLeaves,
focusTree_.leafCountsAcc(), focusTree_.assignment(),
{rawPtr(layoutAcc_), layoutAcc_.size()}, box(), rawPtr(h), haloSearchExt_,
std::get<0>(scratch));
focusTree_.addMacs(halos_.haloFlags());
fail = halos_.computeLayout(focusTree_.treeLeaves(), focusTree_.leafCounts(), focusTree_.assignment(),
peers, layout_);
MPI_Allreduce(MPI_IN_PLACE, &fail, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

if (fail)
{
centerDriftTol_ += 0.05;
if (myRank_ == 0) { std::cout << "Increased centerDriftTol to " << centerDriftTol_ << std::endl; }
}
} while (fail);

// diagnostics(keyView.size(), peers);

Expand Down Expand Up @@ -401,6 +413,7 @@ class Domain
//! @brief return the coordinate bounding box from the previous sync call
const Box<T>& box() const { return global_.box(); }

void setTreeConv(bool flag) { convergeTrees = flag; }
void setHaloFactor(float factor) { haloSearchExt_ = factor; }
void setGrowthAllocRate(float factor) { allocGrowthRate_ = factor; }

Expand Down Expand Up @@ -645,8 +658,11 @@ class Domain
//! @brief MAC parameter for focus resolution and gravity treewalk
float theta_;

bool convergeTrees{false};
//! @brief Extra search factor for halo discovery, allowing multiple time integration steps between sync() calls
float haloSearchExt_{1.0};
//! @brief factor to tighten theta to avoid failed macs by remote cells due to centers having moved closer
float centerDriftTol_{1.05};
//! @brief buffer growth rate when reallocating
float allocGrowthRate_{1.05};

Expand Down
53 changes: 30 additions & 23 deletions domain/include/cstone/halos/halos.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,14 +70,19 @@ void checkIndices(const SendList& sendList,
}

//! @brief check halo discovery for sanity
void checkHalos(int myRank, gsl::span<const TreeIndexPair> focusAssignment, gsl::span<const int> haloFlags)
template<class KeyType>
int checkHalos(int myRank,
gsl::span<const TreeIndexPair> focusAssignment,
gsl::span<const int> haloFlags,
gsl::span<const KeyType> ftree)
{
TreeNodeIndex firstAssignedNode = focusAssignment[myRank].start();
TreeNodeIndex lastAssignedNode = focusAssignment[myRank].end();

std::array<TreeNodeIndex, 2> checkRanges[2] = {{0, firstAssignedNode},
{lastAssignedNode, TreeNodeIndex(haloFlags.size())}};

int ret = 0;
for (int range = 0; range < 2; ++range)
{
#pragma omp parallel for
Expand All @@ -92,16 +97,16 @@ void checkHalos(int myRank, gsl::span<const TreeIndexPair> focusAssignment, gsl:
}
if (!peerFound)
{
std::cout << "Detected halo cells not belonging to peer ranks. This usually happens"
<< " when some particles have smoothing length interaction sphere volumes"
<< " of similar magnitude than the rank domain volume. In that case, either"
<< " the number of ranks needs to be decreased or the number of particles"
<< " increased, leading to shorter smoothing lengths\n";
MPI_Abort(MPI_COMM_WORLD, 35);
std::cout << "Assignment rank " << myRank << " " << std::oct << ftree[firstAssignedNode] << " - "
<< ftree[lastAssignedNode] << std::dec << std::endl;
std::cout << "Failed node " << i << " " << std::oct << ftree[i] << " - " << ftree[i + 1] << std::dec
<< std::endl;
ret = 1;
}
}
}
}
return ret;
}

} // namespace detail
Expand Down Expand Up @@ -197,33 +202,35 @@ class Halos

/*! @brief Compute particle offsets of each tree node and determine halo send/receive indices
*
* @param[in] leaves (focus) tree leaves
* @param[in] counts (focus) tree counts
* @param[in] assignment assignment of @p leaves to ranks
* @param[in] peers list of peer ranks
* @param[out] layout Particle offsets for each node in @p leaves w.r.t to the final particle buffers,
* including the halos, length = counts.size() + 1. The last element contains
* the total number of locally present particles, i.e. assigned + halos.
* [layout[i]:layout[i+1]] indexes particles in the i-th leaf cell.
* If the i-th cell is not a halo and not locally owned, its particles are not present
* and the corresponding layout range has length zero.
* @param[in] leaves (focus) tree leaves
* @param[in] counts (focus) tree counts
* @param[in] assignment assignment of @p leaves to ranks
* @param[in] peers list of peer ranks
* @param[out] layout Particle offsets for each node in @p leaves w.r.t to the final particle buffers,
* including the halos, length = counts.size() + 1. The last element contains
* the total number of locally present particles, i.e. assigned + halos.
* [layout[i]:layout[i+1]] indexes particles in the i-th leaf cell.
* If the i-th cell is not a halo and not locally owned, its particles are not present
* and the corresponding layout range has length zero.
* @return 0 if all halo cells have been matched with a peer rank, 1 otherwise
*/
void computeLayout(gsl::span<const KeyType> leaves,
gsl::span<const unsigned> counts,
gsl::span<const TreeIndexPair> assignment,
gsl::span<const int> peers,
gsl::span<LocalIndex> layout)
int computeLayout(gsl::span<const KeyType> leaves,
gsl::span<const unsigned> counts,
gsl::span<const TreeIndexPair> assignment,
gsl::span<const int> peers,
gsl::span<LocalIndex> layout)
{
computeNodeLayout(counts, haloFlags_, assignment[myRank_].start(), assignment[myRank_].end(), layout);
auto newParticleStart = layout[assignment[myRank_].start()];
auto newParticleEnd = layout[assignment[myRank_].end()];

outgoingHaloIndices_ = exchangeRequestKeys<KeyType>(leaves, haloFlags_, assignment, peers, layout);

detail::checkHalos(myRank_, assignment, haloFlags_);
if (detail::checkHalos(myRank_, assignment, haloFlags_, leaves)) { return 1; }
detail::checkIndices(outgoingHaloIndices_, newParticleStart, newParticleEnd, layout.back());

incomingHaloIndices_ = computeHaloReceiveList(layout, haloFlags_, assignment, peers);
return 0;
}

/*! @brief repeat the halo exchange pattern from the previous sync operation for a different set of arrays
Expand Down
2 changes: 2 additions & 0 deletions main/src/propagator/ve_hydro_bdt.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,9 @@ class HydroVeBdtProp : public Propagator<DomainType, DataType>

void sync(DomainType& domain, DataType& simData) override
{
domain.setTreeConv(true);
domain.setHaloFactor(1.0 + float(timestep_.numRungs) / 40);

if (activeRung(timestep_.substep, timestep_.numRungs) == 0) { fullSync(domain, simData); }
else { partialSync(domain, simData); }
}
Expand Down

0 comments on commit 26cbbc2

Please sign in to comment.