Skip to content

Commit

Permalink
fix(copy): polish the implementations. (#49)
Browse files Browse the repository at this point in the history
This PR includes the following minor changes:

1. Moved `sync.hpp` into the copy directory, as it is more related.
2. Made the implementation of `sync.hpp` independent of cutlass.
  • Loading branch information
lcy-seso authored Jan 28, 2025
1 parent 1b922c1 commit e976316
Show file tree
Hide file tree
Showing 5 changed files with 5 additions and 8 deletions.
1 change: 1 addition & 0 deletions include/cell/copy/mod.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,6 @@
#include "cell/copy/global_to_shared.hpp"
#include "cell/copy/register.hpp"
#include "cell/copy/shared_to_register.hpp"
#include "cell/copy/sync.hpp"
#include "cell/copy/vectorize.hpp"
#include "cell/copy/warp.hpp"
6 changes: 3 additions & 3 deletions include/cell/sync.hpp → include/cell/copy/sync.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

#include "cuda_utils.hpp"

namespace tilefusion::cell {
namespace tilefusion::cell::copy {

template <int N>
DEVICE void wait_group() {
Expand All @@ -16,12 +16,12 @@ DEVICE void wait_group() {

DEVICE void commit_copy_group() {
#if defined(CP_ASYNC_SM80_ENABLED)
cute::cp_async_fence();
asm volatile("cp.async.commit_group;\n" ::);
#endif
}

DEVICE void __copy_async() {
commit_copy_group();
wait_group<0>();
}
} // namespace tilefusion::cell
} // namespace tilefusion::cell::copy
1 change: 0 additions & 1 deletion include/cell/mod.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

#include "cell/compute/mod.hpp"
#include "cell/copy/mod.hpp"
#include "cell/sync.hpp"
#include "cell/warp.hpp"
#include "traits/base.hpp"
#include "types/mod.hpp"
2 changes: 1 addition & 1 deletion tests/cpp/cell/test_g2s_load.cu
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ __global__ void copy_g2s(const Element* src_ptr, Element* dst_ptr,
SrcTile dst(dst_ptr); // global memory tile

loader(src, inter);
__copy_async();
copy::__copy_async();
__syncthreads();

storer(inter, dst);
Expand Down
3 changes: 0 additions & 3 deletions tests/cpp/cell/test_swizzled_copy.cu
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
// Licensed under the MIT License.

#include "cell/copy/mod.hpp"
#include "cell/sync.hpp"
#include "common/test_utils.hpp"
#include "types/mod.hpp"

Expand All @@ -12,8 +11,6 @@

#include <sstream>

#define DEBUG

namespace tilefusion::testing {
using namespace cell;
using namespace copy;
Expand Down

0 comments on commit e976316

Please sign in to comment.