-
Notifications
You must be signed in to change notification settings - Fork 1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
refactor: simplify journal and restore streamer cancellation NOT READY FOR REVIEW #4549
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,7 +42,7 @@ JournalStreamer::JournalStreamer(journal::Journal* journal, Context* cntx) | |
} | ||
|
||
JournalStreamer::~JournalStreamer() { | ||
if (!cntx_->IsCancelled()) { | ||
if (!cntx_->GetError()) { | ||
DCHECK_EQ(in_flight_bytes_, 0u); | ||
} | ||
VLOG(1) << "~JournalStreamer"; | ||
|
@@ -83,7 +83,7 @@ void JournalStreamer::Cancel() { | |
VLOG(1) << "JournalStreamer::Cancel"; | ||
waker_.notifyAll(); | ||
journal_->UnregisterOnChange(journal_cb_id_); | ||
if (!cntx_->IsCancelled()) { | ||
if (!cntx_->GetError()) { | ||
WaitForInflightToComplete(); | ||
} | ||
} | ||
|
@@ -134,10 +134,12 @@ void JournalStreamer::OnCompletion(std::error_code ec, size_t len) { | |
DVLOG(3) << "Completing " << in_flight_bytes_; | ||
in_flight_bytes_ = 0; | ||
pending_buf_.Pop(); | ||
if (ec && !IsStopped()) { | ||
cntx_->ReportError(ec); | ||
} else if (!pending_buf_.Empty() && !IsStopped()) { | ||
AsyncWrite(); | ||
if (!cntx_->IsCancelled()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we care loosing the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we don't care regarding error if we cancel streamer |
||
if (ec) { | ||
cntx_->ReportError(ec); | ||
} else if (!pending_buf_.Empty()) { | ||
AsyncWrite(); | ||
} | ||
} | ||
|
||
// notify ThrottleIfNeeded or WaitForInflightToComplete that waits | ||
|
@@ -149,7 +151,7 @@ void JournalStreamer::OnCompletion(std::error_code ec, size_t len) { | |
} | ||
|
||
void JournalStreamer::ThrottleIfNeeded() { | ||
if (IsStopped() || !IsStalled()) | ||
if (cntx_->IsCancelled() || !IsStalled()) | ||
return; | ||
|
||
auto next = | ||
|
@@ -158,7 +160,7 @@ void JournalStreamer::ThrottleIfNeeded() { | |
size_t sent_start = total_sent_; | ||
|
||
std::cv_status status = | ||
waker_.await_until([this]() { return !IsStalled() || IsStopped(); }, next); | ||
waker_.await_until([this]() { return !IsStalled() || cntx_->IsCancelled(); }, next); | ||
if (status == std::cv_status::timeout) { | ||
LOG(WARNING) << "Stream timed out, inflight bytes/sent start: " << inflight_start << "/" | ||
<< sent_start << ", end: " << in_flight_bytes_ << "/" << total_sent_; | ||
|
@@ -188,7 +190,7 @@ RestoreStreamer::RestoreStreamer(DbSlice* slice, cluster::SlotSet slots, journal | |
} | ||
|
||
void RestoreStreamer::Start(util::FiberSocketBase* dest, bool send_lsn) { | ||
if (fiber_cancelled_) | ||
if (cntx_->IsCancelled()) | ||
return; | ||
|
||
VLOG(1) << "RestoreStreamer start"; | ||
|
@@ -206,16 +208,16 @@ void RestoreStreamer::Run() { | |
PrimeTable* pt = &db_array_[0]->prime; | ||
|
||
do { | ||
if (fiber_cancelled_) | ||
if (cntx_->IsCancelled()) | ||
return; | ||
cursor = pt->TraverseBuckets(cursor, [&](PrimeTable::bucket_iterator it) { | ||
if (fiber_cancelled_) // Could be cancelled any time as Traverse may preempt | ||
if (cntx_->IsCancelled()) // Could be cancelled any time as Traverse may preempt | ||
return; | ||
|
||
db_slice_->FlushChangeToEarlierCallbacks(0 /*db_id always 0 for cluster*/, | ||
DbSlice::Iterator::FromPrime(it), snapshot_version_); | ||
|
||
if (fiber_cancelled_) // Could have been cancelled in above call too | ||
if (cntx_->IsCancelled()) // Could have been cancelled in above call too | ||
return; | ||
|
||
std::lock_guard guard(big_value_mu_); | ||
|
@@ -231,7 +233,7 @@ void RestoreStreamer::Run() { | |
ThisFiber::Yield(); | ||
last_yield = 0; | ||
} | ||
} while (cursor && !fiber_cancelled_); | ||
} while (cursor); | ||
|
||
VLOG(1) << "RestoreStreamer finished loop of " << my_slots_.ToSlotRanges().ToString() | ||
<< ", shard " << db_slice_->shard_id() << ". Buckets looped " << stats_.buckets_loop; | ||
|
@@ -252,8 +254,7 @@ void RestoreStreamer::SendFinalize(long attempt) { | |
writer.Write(entry); | ||
Write(std::move(sink).str()); | ||
|
||
// TODO: is the intent here to flush everything? | ||
// | ||
// DFLYMIGRATE ACK command has a timeout so we want to send it only when LSN is ready to be sent | ||
ThrottleIfNeeded(); | ||
} | ||
|
||
|
@@ -263,7 +264,7 @@ RestoreStreamer::~RestoreStreamer() { | |
void RestoreStreamer::Cancel() { | ||
auto sver = snapshot_version_; | ||
snapshot_version_ = 0; // to prevent double cancel in another fiber | ||
fiber_cancelled_ = true; | ||
cntx_->Cancel(); | ||
if (sver != 0) { | ||
db_slice_->UnregisterOnChange(sver); | ||
JournalStreamer::Cancel(); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,7 +47,7 @@ class JournalStreamer { | |
void ThrottleIfNeeded(); | ||
|
||
virtual bool ShouldWrite(const journal::JournalItem& item) const { | ||
return !IsStopped(); | ||
return !cntx_->IsCancelled(); | ||
} | ||
|
||
void WaitForInflightToComplete(); | ||
|
@@ -59,10 +59,6 @@ class JournalStreamer { | |
void AsyncWrite(); | ||
void OnCompletion(std::error_code ec, size_t len); | ||
|
||
bool IsStopped() const { | ||
return cntx_->IsCancelled(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||
} | ||
|
||
bool IsStalled() const; | ||
|
||
journal::Journal* journal_; | ||
|
@@ -91,10 +87,6 @@ class RestoreStreamer : public JournalStreamer { | |
|
||
void SendFinalize(long attempt); | ||
|
||
bool IsSnapshotFinished() const { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dead function! nice! |
||
return snapshot_finished_; | ||
} | ||
|
||
private: | ||
void OnDbChange(DbIndex db_index, const DbSlice::ChangeReq& req); | ||
bool ShouldWrite(const journal::JournalItem& item) const override; | ||
|
@@ -121,8 +113,7 @@ class RestoreStreamer : public JournalStreamer { | |
DbTableArray db_array_; | ||
uint64_t snapshot_version_ = 0; | ||
cluster::SlotSet my_slots_; | ||
bool fiber_cancelled_ = false; | ||
bool snapshot_finished_ = false; | ||
|
||
ThreadLocalMutex big_value_mu_; | ||
Stats stats_; | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why
GetError()
here and notIsCancelled()
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
because if we cancel the streamer, we should wait for WaitForInflightToComplete() and in_flight_bytes_ should be 0
otherwise not
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
But it's the exact same thing? We can cancel a context in two ways:
Contenxt::Cancel
which submits the error codeoperation_cancel
to the underline error handler via theReportError
ReportError
which submits the error code passed as an argument to the underline error handlerBoth (1) and (2) call
Cancellation::Cancel
soIsCancelled()
will always be true for both paths.Why do we need to fetch the error here via
GetError
and why does not!cntx_->IsCancelled()
is not enough in this context ?I could be missing something -- I just want to make sure I understand this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thx @kostasrim you are right. Honestly, such context interface doesn't look logical to me and I thought the functions are different. your comment helps me