Skip to content

Commit

Permalink
[cherry-pick] aggressive backpressure changes (#11345 and #11550) (#1…
Browse files Browse the repository at this point in the history
…1552)

* aggressive backpressure changes (#11345)

* Add min gap to fallback to statesync (#11550)

---------
  • Loading branch information
igor-aptos authored Jan 4, 2024
1 parent 1d56443 commit 706a413
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 16 deletions.
48 changes: 34 additions & 14 deletions config/src/config/consensus_config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,42 +179,62 @@ impl Default for ConsensusConfig {

// Voting backpressure is only used as a backup, to make sure pending rounds don't
// increase uncontrollably, and we know when to go to state sync.
vote_back_pressure_limit: 30,
// Considering block gas limit and pipeline backpressure should keep number of blocks
// in the pipline very low, we can keep this limit pretty low, too.
vote_back_pressure_limit: 7,
pipeline_backpressure: vec![
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 1000,
// pipeline_latency looks how long has the oldest block still in pipeline
// been in the pipeline.
// Block enters the pipeline after consensus orders it, and leaves the
// pipeline once quorum on execution result among validators has been reached
// (so-(badly)-called "commit certificate"), meaning 2f+1 validators have finished execution.
back_pressure_pipeline_latency_limit_ms: 800,
max_sending_block_txns_override: 10000,
max_sending_block_bytes_override: 5 * 1024 * 1024,
backpressure_proposal_delay_ms: 100,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 1500,
back_pressure_pipeline_latency_limit_ms: 1100,
max_sending_block_txns_override: 10000,
max_sending_block_bytes_override: 5 * 1024 * 1024,
backpressure_proposal_delay_ms: 200,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 2000,
max_sending_block_txns_override: 10000,
max_sending_block_bytes_override: 5 * 1024 * 1024,
back_pressure_pipeline_latency_limit_ms: 1400,
max_sending_block_txns_override: 2000,
max_sending_block_bytes_override: 1024 * 1024 + BATCH_PADDING_BYTES as u64,
backpressure_proposal_delay_ms: 300,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 2500,
max_sending_block_txns_override: 2000,
back_pressure_pipeline_latency_limit_ms: 1700,
max_sending_block_txns_override: 1000,
max_sending_block_bytes_override: 1024 * 1024 + BATCH_PADDING_BYTES as u64,
backpressure_proposal_delay_ms: 300,
backpressure_proposal_delay_ms: 400,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 2000,
max_sending_block_txns_override: 600,
max_sending_block_bytes_override: 1024 * 1024 + BATCH_PADDING_BYTES as u64,
backpressure_proposal_delay_ms: 500,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 2300,
max_sending_block_txns_override: 400,
max_sending_block_bytes_override: 1024 * 1024 + BATCH_PADDING_BYTES as u64,
backpressure_proposal_delay_ms: 500,
},
PipelineBackpressureValues {
back_pressure_pipeline_latency_limit_ms: 4000,
back_pressure_pipeline_latency_limit_ms: 2600,
// in practice, latencies and delay make it such that ~2 blocks/s is max,
// meaning that most aggressively we limit to ~1000 TPS
// meaning that most aggressively we limit to ~500 TPS
// For transactions that are more expensive than that, we should
// instead rely on max gas per block to limit latency
max_sending_block_txns_override: 500,
// instead rely on max gas per block to limit latency.
// We cannot reduce this further currently, as it needs to be larger than batch size.
max_sending_block_txns_override: 250,
// stop reducing size, so 1MB transactions can still go through
max_sending_block_bytes_override: 1024 * 1024 + BATCH_PADDING_BYTES as u64,
backpressure_proposal_delay_ms: 300,
backpressure_proposal_delay_ms: 500,
},
],
window_for_chain_health: 100,
Expand Down
2 changes: 1 addition & 1 deletion consensus/src/block_storage/block_store_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ async fn test_need_sync_for_ledger_info() {
assert!(block_store.need_sync_for_ledger_info(&ordered_too_far));

let committed_round_too_far =
block_store.commit_root().round() + block_store.vote_back_pressure_limit * 2 + 1;
block_store.commit_root().round() + 30.max(block_store.vote_back_pressure_limit * 2) + 1;
let committed_too_far = create_ledger_info(committed_round_too_far);
assert!(block_store.need_sync_for_ledger_info(&committed_too_far));

Expand Down
3 changes: 2 additions & 1 deletion consensus/src/block_storage/sync_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ impl BlockStore {
/// Check if we're far away from this ledger info and need to sync.
/// This ensures that the block referred by the ledger info is not in buffer manager.
pub fn need_sync_for_ledger_info(&self, li: &LedgerInfoWithSignatures) -> bool {
// TODO move min gap to fallback (30) to config.
(self.ordered_root().round() < li.commit_info().round()
&& !self.block_exists(li.commit_info().id()))
|| self.commit_root().round() + 2 * self.vote_back_pressure_limit
|| self.commit_root().round() + 30.max(2 * self.vote_back_pressure_limit)
< li.commit_info().round()
}

Expand Down
8 changes: 8 additions & 0 deletions consensus/src/counters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,14 @@ pub static LEADER_REPUTATION_ROUND_HISTORY_SIZE: Lazy<IntGauge> = Lazy::new(|| {
.unwrap()
});

/// Counts when chain_health backoff is triggered
pub static CONSENSUS_WITHOLD_VOTE_BACKPRESSURE_TRIGGERED: Lazy<Histogram> = Lazy::new(|| {
register_avg_counter(
"aptos_consensus_withold_vote_backpressure_triggered",
"Counts when consensus vote_backpressure is triggered",
)
});

/// Counts when chain_health backoff is triggered
pub static CHAIN_HEALTH_BACKOFF_TRIGGERED: Lazy<Histogram> = Lazy::new(|| {
register_avg_counter(
Expand Down
2 changes: 2 additions & 0 deletions consensus/src/round_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,7 @@ impl RoundManager {

observe_block(proposal.timestamp_usecs(), BlockStage::SYNCED);
if self.decoupled_execution() && self.block_store.vote_back_pressure() {
counters::CONSENSUS_WITHOLD_VOTE_BACKPRESSURE_TRIGGERED.observe(1.0);
// In case of back pressure, we delay processing proposal. This is done by resending the
// same proposal to self after some time. Even if processing proposal is delayed, we add
// the block to the block store so that we don't need to fetch it from remote once we
Expand All @@ -733,6 +734,7 @@ impl RoundManager {
.await;
Ok(())
} else {
counters::CONSENSUS_WITHOLD_VOTE_BACKPRESSURE_TRIGGERED.observe(0.0);
self.process_verified_proposal(proposal).await
}
}
Expand Down

0 comments on commit 706a413

Please sign in to comment.