From 5828f401068efe0843562c8a3574e65ad3eacd35 Mon Sep 17 00:00:00 2001 From: Mark Logan <103447440+mystenmark@users.noreply.github.com> Date: Thu, 27 Feb 2025 21:02:34 -0800 Subject: [PATCH] Delay delete of deferred transactions until end of commit handler (#21376) This fixes a bug in how we handle the end of epoch pre- and post- data quarantining. In the old code, we check if there were any deferred txns loaded in the current commit with this code: https://github.com/MystenLabs/sui/blob/2f52a7283e5f805b728ce1570cb66d27f0a95648/crates/sui-core/src/authority/authority_per_epoch_store.rs#L2150 Because this looks at the table, it does not reflect that the fact that we may have already scheduled deletes from the table. In the new code, we look at the in-memory map https://github.com/MystenLabs/sui/blob/67fea6a41b90e6b49d9829766a29b4f74cfa4900/crates/sui-core/src/authority/authority_per_epoch_store.rs#L2038 - keys are removed from this map immediately upon loading them at https://github.com/MystenLabs/sui/blob/67fea6a41b90e6b49d9829766a29b4f74cfa4900/crates/sui-core/src/authority/authority_per_epoch_store.rs#L1884 This in turn can cause us to improperly skip drop randomness transactions at the end of the epoch here: https://github.com/MystenLabs/sui/blob/67fea6a41b90e6b49d9829766a29b4f74cfa4900/crates/sui-core/src/authority/authority_per_epoch_store.rs#L3594 --- .../authority/authority_per_epoch_store.rs | 41 +++++++++++-------- .../src/authority/consensus_quarantine.rs | 4 ++ 2 files changed, 29 insertions(+), 16 deletions(-) diff --git a/crates/sui-core/src/authority/authority_per_epoch_store.rs b/crates/sui-core/src/authority/authority_per_epoch_store.rs index 2f4bb0286e294..67289c865539f 100644 --- a/crates/sui-core/src/authority/authority_per_epoch_store.rs +++ b/crates/sui-core/src/authority/authority_per_epoch_store.rs @@ -1938,20 +1938,25 @@ impl AuthorityPerEpochStore { max: DeferralKey, ) -> SuiResult)>> { debug!("Query epoch store to load deferred txn {:?} {:?}", min, max); - let mut keys = Vec::new(); - let mut txns = Vec::new(); - let mut deferred_transactions = self.consensus_output_cache.deferred_transactions.lock(); + let (keys, txns) = { + let mut keys = Vec::new(); + let mut txns = Vec::new(); - for (key, transactions) in deferred_transactions.range(min..max) { - debug!( - "Loaded {:?} deferred txn with deferral key {:?}", - transactions.len(), - key - ); - keys.push(*key); - txns.push((*key, transactions.clone())); - } + let deferred_transactions = self.consensus_output_cache.deferred_transactions.lock(); + + for (key, transactions) in deferred_transactions.range(min..max) { + debug!( + "Loaded {:?} deferred txn with deferral key {:?}", + transactions.len(), + key + ); + keys.push(*key); + txns.push((*key, transactions.clone())); + } + + (keys, txns) + }; // verify that there are no duplicates - should be impossible due to // is_consensus_message_processed @@ -1965,10 +1970,6 @@ impl AuthorityPerEpochStore { } } - for key in &keys { - deferred_transactions.remove(key); - } - output.delete_loaded_deferred_transactions(&keys); Ok(txns) @@ -3162,6 +3163,14 @@ impl AuthorityPerEpochStore { } } + { + let mut deferred_transactions = + self.consensus_output_cache.deferred_transactions.lock(); + for deleted_deferred_key in output.get_deleted_deferred_txn_keys() { + deferred_transactions.remove(&deleted_deferred_key); + } + } + self.consensus_quarantine .write() .push_consensus_output(output, self)?; diff --git a/crates/sui-core/src/authority/consensus_quarantine.rs b/crates/sui-core/src/authority/consensus_quarantine.rs index 0b8532a217fe4..c11e4cd0c5561 100644 --- a/crates/sui-core/src/authority/consensus_quarantine.rs +++ b/crates/sui-core/src/authority/consensus_quarantine.rs @@ -103,6 +103,10 @@ impl ConsensusCommitOutput { } } + pub fn get_deleted_deferred_txn_keys(&self) -> impl Iterator + use<'_> { + self.deleted_deferred_txns.iter().cloned() + } + fn get_randomness_last_round_timestamp(&self) -> Option { self.next_randomness_round.as_ref().map(|(_, ts)| *ts) }