Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Interleaved computation with communication in halo exchange #881

Draft
wants to merge 69 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
25243a9
added count to mhp algorithms
Jul 31, 2024
c950ea3
Merge branch 'main' of https://github.com/oneapi-src/distributed-rang…
Jul 31, 2024
7eec868
minor fix
Jul 31, 2024
6090fdc
minor fixes
Jul 31, 2024
167702d
code review fixes
Aug 2, 2024
755c896
more code review fixes
Aug 5, 2024
e98de3b
removed redundant conditional
Aug 16, 2024
f31b80c
fixes according to pre-commit checks
Aug 25, 2024
b847d04
Merge branch 'main' of https://github.com/oneapi-src/distributed-rang…
Sep 3, 2024
06cc78b
Merge branch 'main' of https://github.com/oneapi-src/distributed-rang…
Sep 26, 2024
5511751
added cyclic_halo_impl and distributed_vector_dual
Nov 11, 2024
b26665e
Merge pull request #1 from quazuo/count
quazuo Nov 11, 2024
e1e9910
added dual_segment and refined dual_distributed_vector
Dec 2, 2024
bdecda7
progress
Dec 25, 2024
9e81fd7
Merge remote-tracking branch 'upstream/main'
Dec 25, 2024
811307c
tiny fix
Dec 25, 2024
a5fdcf5
Merge branch 'main' of https://github.com/quazuo/distributed-ranges
Dec 25, 2024
5353689
prog
Dec 25, 2024
6701c41
prog
Dec 25, 2024
f79fe45
prog
Dec 25, 2024
b126f8b
prog
Dec 25, 2024
e1d50c9
prog
Dec 25, 2024
0da1f2c
prog
Dec 25, 2024
871bd58
prog
Dec 27, 2024
2e3c96d
prog
Dec 27, 2024
dff502a
prog
Dec 27, 2024
89f9c18
prog
Dec 27, 2024
2e4cc88
prog
Dec 27, 2024
9c37e12
prog
Dec 27, 2024
78cbd29
prog
Dec 27, 2024
e18f59a
prog
Dec 27, 2024
3329c97
prog
Dec 27, 2024
a0e067d
prog
Dec 27, 2024
fb05c7d
prog
Dec 27, 2024
ea43af7
prog
Dec 27, 2024
4fd4d8f
prog
Dec 27, 2024
95ca6cf
prog
Dec 27, 2024
900613e
prog
Dec 27, 2024
9218cd8
prog
Dec 27, 2024
46bfa65
prog
Dec 27, 2024
3d7a9a8
prog
Dec 27, 2024
2603a6c
prog
Dec 27, 2024
cdce405
prog
Dec 27, 2024
46f6ade
prog
Jan 1, 2025
e4eafa2
prog
Jan 1, 2025
ad460d8
prog
Jan 1, 2025
bd1e8ed
prog
Jan 1, 2025
8d0f5be
prog
Jan 1, 2025
046b7e4
prog
Jan 1, 2025
5c32e9e
prog
Jan 1, 2025
6e95e27
prog
Jan 2, 2025
46dff8e
prog
Jan 2, 2025
b842cc7
prog
Jan 2, 2025
d43b9d3
prog
Jan 2, 2025
1b6b21b
prog
Jan 2, 2025
d333564
prog
Jan 2, 2025
8690c17
prog
Jan 2, 2025
3860947
prog
Jan 2, 2025
6dcd2f4
prog
Jan 2, 2025
df55977
prog
Jan 2, 2025
4b0f293
prog
Jan 2, 2025
0fe709c
prog
Jan 2, 2025
05fa8f6
prog
Jan 7, 2025
0d94948
prog
Jan 7, 2025
33a1d4f
prog
Jan 7, 2025
8dd8a00
prog
Jan 7, 2025
d04461d
prog
Jan 13, 2025
497eb8c
prog
Jan 13, 2025
29759f1
prog
Jan 21, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions include/dr/mp.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,3 +79,4 @@
#include <dr/mp/algorithms/transpose.hpp>
#include <dr/mp/containers/distributed_vector.hpp>
#include <dr/mp/containers/distributed_mdarray.hpp>
#include <dr/mp/containers/dual_distributed_vector.hpp>
45 changes: 44 additions & 1 deletion include/dr/mp/algorithms/for_each.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,50 @@

namespace dr::mp {

/// Collective for_each on distributed range
template <typename R>
concept dual_vector_range =
dr::distributed_range<R> && requires(R &r) { dr::ranges::segments(r)[0].is_compute(); };

void for_each(dual_vector_range auto &&dr, auto op) {
partial_for_each(dr, op);
partial_for_each(dr, op);
}

void partial_for_each(dual_vector_range auto &&dr, auto op) {
dr::drlog.debug(dr::logger::for_each, "partial_for_each: parallel execution\n");
if (rng::empty(dr)) {
return;
}

for (auto &s : local_segments(dr)) {
if (!s.is_compute()) {
s.swap_state();
continue;
}

if (mp::use_sycl()) {
dr::drlog.debug(" using sycl\n");

assert(rng::distance(s) > 0);
#ifdef SYCL_LANGUAGE_VERSION
dr::__detail::parallel_for(
dr::mp::sycl_queue(), sycl::range<1>(rng::distance(s)),
[first = rng::begin(s), op](auto idx) { op(first[idx]); })
.wait();
#else
assert(false);
#endif
} else {
dr::drlog.debug(" using cpu\n");
rng::for_each(s, op);
}

s.swap_state();
}
barrier();
}

// Collective for_each on distributed range
void for_each(dr::distributed_range auto &&dr, auto op) {
dr::drlog.debug(dr::logger::for_each, "for_each: parallel execution\n");
if (rng::empty(dr)) {
Expand Down
15 changes: 11 additions & 4 deletions include/dr/mp/containers/distributed_vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,7 @@ template <typename T, class BackendT = MpiBackend> class distributed_vector {
if (!finalized()) {
fence();
if (data_ != nullptr) {
backend.deallocate(data_, data_size_ * sizeof(value_type));
backend_.deallocate(data_, data_size_ * sizeof(value_type));
}

delete halo_;
Expand All @@ -274,7 +274,14 @@ template <typename T, class BackendT = MpiBackend> class distributed_vector {

auto segments() const { return rng::views::all(segments_); }

void fence() { backend.fence(); }
void fence() { backend_.fence(); }

backend_type& backend(const std::size_t segment_index) { return backend_; }
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add __attribute__((unused)) to these functoins, but I'm unsure this function is really needed

const backend_type& backend(const std::size_t segment_index) const {
return backend_;
}

T *data(const std::size_t segment_index) { return data_; }

private:
void init(auto size, auto dist) {
Expand All @@ -295,7 +302,7 @@ template <typename T, class BackendT = MpiBackend> class distributed_vector {
data_size_ = segment_size_ + hb.prev + hb.next;

if (size_ > 0) {
data_ = static_cast<T *>(backend.allocate(data_size_ * sizeof(T)));
data_ = static_cast<T *>(backend_.allocate(data_size_ * sizeof(T)));
}

halo_ = new span_halo<T>(default_comm(), data_, data_size_, hb);
Expand All @@ -319,7 +326,7 @@ template <typename T, class BackendT = MpiBackend> class distributed_vector {
distribution distribution_;
std::size_t size_;
std::vector<dv_segment<distributed_vector>> segments_;
BackendT backend;
BackendT backend_;
};

template <typename T, typename B>
Expand Down
Loading