diff --git a/include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h b/include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h index 7a97e0cb32..6b7351cfd1 100644 --- a/include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h +++ b/include/cutlass/epilogue/threadblock/epilogue_with_broadcast.h @@ -946,13 +946,13 @@ class EpilogueWithBroadcast< // if (OutputOp::kStoreZ) { + destination_iterator += reduce_fragment_idx; destination_iterator.store(frag_Z); - ++destination_iterator; } if (OutputOp::kStoreT) { + tensor_iterator += reduce_fragment_idx; tensor_iterator.store(frag_T); - ++tensor_iterator; } } }; @@ -1698,13 +1698,13 @@ class EpilogueWithBroadcast< // if (OutputOp::kStoreZ) { + destination_iterator += reduce_fragment_idx; destination_iterator.store(frag_Z); - ++destination_iterator; } if (OutputOp::kStoreT) { + tensor_iterator += reduce_fragment_idx; tensor_iterator.store(frag_T); - ++tensor_iterator; } } };