Skip to content

Commit

Permalink
feat: adjust backoff duration to better accommodate CCIP ISM (#5468)
Browse files Browse the repository at this point in the history
### Description

- More frequent retries for the first ~30 mins, meant to accommodate the
upcoming CCIP ISM which is known to take ~25 mins on some origins

After this change:
```
Retry #0: cumulative duration from beginning is 0:0:0, since last attempt is 0:0:0
Retry #1: cumulative duration from beginning is 0:0:0, since last attempt is 0:0:10
Retry #2: cumulative duration from beginning is 0:0:10, since last attempt is 0:0:10
Retry #3: cumulative duration from beginning is 0:0:20, since last attempt is 0:0:10
Retry #4: cumulative duration from beginning is 0:0:30, since last attempt is 0:0:10
Retry #5: cumulative duration from beginning is 0:0:40, since last attempt is 0:0:10
Retry #6: cumulative duration from beginning is 0:0:50, since last attempt is 0:0:10
Retry #7: cumulative duration from beginning is 0:1:0, since last attempt is 0:0:10
Retry #8: cumulative duration from beginning is 0:1:10, since last attempt is 0:0:10
Retry #9: cumulative duration from beginning is 0:1:20, since last attempt is 0:0:10
Retry #10: cumulative duration from beginning is 0:1:30, since last attempt is 0:1:30
Retry #11: cumulative duration from beginning is 0:3:0, since last attempt is 0:1:30
Retry #12: cumulative duration from beginning is 0:4:30, since last attempt is 0:1:30
Retry #13: cumulative duration from beginning is 0:6:0, since last attempt is 0:1:30
Retry #14: cumulative duration from beginning is 0:7:30, since last attempt is 0:1:30
Retry #15: cumulative duration from beginning is 0:9:0, since last attempt is 0:2:0
Retry #16: cumulative duration from beginning is 0:11:0, since last attempt is 0:2:0
Retry #17: cumulative duration from beginning is 0:13:0, since last attempt is 0:2:0
Retry #18: cumulative duration from beginning is 0:15:0, since last attempt is 0:2:0
Retry #19: cumulative duration from beginning is 0:17:0, since last attempt is 0:2:0
Retry #20: cumulative duration from beginning is 0:19:0, since last attempt is 0:2:0
Retry #21: cumulative duration from beginning is 0:21:0, since last attempt is 0:2:0
Retry #22: cumulative duration from beginning is 0:23:0, since last attempt is 0:2:0
Retry #23: cumulative duration from beginning is 0:25:0, since last attempt is 0:2:0
Retry #24: cumulative duration from beginning is 0:27:0, since last attempt is 0:2:0
Retry #25: cumulative duration from beginning is 0:29:0, since last attempt is 0:3:0
Retry #26: cumulative duration from beginning is 0:32:0, since last attempt is 0:4:30
Retry #27: cumulative duration from beginning is 0:36:30, since last attempt is 0:6:0
Retry #28: cumulative duration from beginning is 0:42:30, since last attempt is 0:7:30
Retry #29: cumulative duration from beginning is 0:50:0, since last attempt is 0:9:0
Retry #30: cumulative duration from beginning is 0:59:0, since last attempt is 0:10:30
Retry #31: cumulative duration from beginning is 1:9:30, since last attempt is 0:12:0
Retry #32: cumulative duration from beginning is 1:21:30, since last attempt is 0:13:30
Retry #33: cumulative duration from beginning is 1:35:0, since last attempt is 0:15:0
Retry #34: cumulative duration from beginning is 1:50:0, since last attempt is 0:16:30
Retry #35: cumulative duration from beginning is 2:6:30, since last attempt is 0:18:0
Retry #36: cumulative duration from beginning is 2:24:30, since last attempt is 0:19:30
Retry #37: cumulative duration from beginning is 2:44:0, since last attempt is 0:21:0
Retry #38: cumulative duration from beginning is 3:5:0, since last attempt is 0:22:30
Retry #39: cumulative duration from beginning is 3:27:30, since last attempt is 0:24:0
Retry #40: cumulative duration from beginning is 3:51:30, since last attempt is 0:30:0
Retry #41: cumulative duration from beginning is 4:21:30, since last attempt is 0:30:0
Retry #42: cumulative duration from beginning is 4:51:30, since last attempt is 0:30:0
Retry #43: cumulative duration from beginning is 5:21:30, since last attempt is 0:30:0
Retry #44: cumulative duration from beginning is 5:51:30, since last attempt is 0:30:0
Retry #45: cumulative duration from beginning is 6:21:30, since last attempt is 1:0:0
Retry #46: cumulative duration from beginning is 7:21:30, since last attempt is 1:0:0
Retry #47: cumulative duration from beginning is 8:21:30, since last attempt is 1:0:0
Retry #48: cumulative duration from beginning is 9:21:30, since last attempt is 1:0:0
Retry #49: cumulative duration from beginning is 10:21:30, since last attempt is 1:0:0
Retry #50: cumulative duration from beginning is 11:21:30, since last attempt is 2:38:56
Retry #51: cumulative duration from beginning is 14:0:26, since last attempt is 8:6:55
Retry #52: cumulative duration from beginning is 22:7:21, since last attempt is 8:24:34
Retry #53: cumulative duration from beginning is 30:31:55, since last attempt is 12:32:21
Retry #54: cumulative duration from beginning is 43:4:16, since last attempt is 15:56:31
Retry #55: cumulative duration from beginning is 59:0:47, since last attempt is 15:29:51
Retry #56: cumulative duration from beginning is 74:30:38, since last attempt is 14:13:18
Retry #57: cumulative duration from beginning is 88:43:56, since last attempt is 16:55:33
Retry #58: cumulative duration from beginning is 105:39:29, since last attempt is 22:51:56
Retry #59: cumulative duration from beginning is 128:31:25, since last attempt is 23:20:42
Retry #60: cumulative duration from beginning is 151:52:7, since last attempt is 24:5:0
Retry #61: cumulative duration from beginning is 175:57:7, since last attempt is 26:51:36
Retry #62: cumulative duration from beginning is 202:48:43, since last attempt is 30:55:26
Retry #63: cumulative duration from beginning is 233:44:9, since last attempt is 32:33:46
Retry #64: cumulative duration from beginning is 266:17:55, since last attempt is 33:58:37
Retry #65: cumulative duration from beginning is 300:16:32, since last attempt is 35:31:14
Retry #66: cumulative duration from beginning is 335:47:46, since last attempt is 38:37:22
Retry #67: cumulative duration from beginning is 374:25:8, since last attempt is 36:1:47
Retry #68: cumulative duration from beginning is 410:26:55, since last attempt is 42:42:1
Retry #69: cumulative duration from beginning is 453:8:56, since last attempt is 42:7:4
Retry #70: cumulative duration from beginning is 495:16:0, since last attempt is 47:45:25
Retry #71: cumulative duration from beginning is 543:1:25, since last attempt is 47:20:15
Retry #72: cumulative duration from beginning is 590:21:40, since last attempt is 50:42:29
Retry #73: cumulative duration from beginning is 641:4:9, since last attempt is 50:36:53
Retry #74: cumulative duration from beginning is 691:41:2, since last attempt is 53:25:8
Retry #75: cumulative duration from beginning is 745:6:10, since last attempt is 54:32:25
Retry #76: cumulative duration from beginning is 799:38:35, since last attempt is 55:46:58
Retry #77: cumulative duration from beginning is 855:25:33, since last attempt is 59:21:28
Retry #78: cumulative duration from beginning is 914:47:1, since last attempt is 58:47:29
Retry #79: cumulative duration from beginning is 973:34:30, since last attempt is 63:38:16
Retry #80: cumulative duration from beginning is 1037:12:46, since last attempt is 64:29:15
Retry #81: cumulative duration from beginning is 1101:42:1, since last attempt is 66:44:38
Retry #82: cumulative duration from beginning is 1168:26:39, since last attempt is 67:13:25
Retry #83: cumulative duration from beginning is 1235:40:4, since last attempt is 71:38:16
Retry #84: cumulative duration from beginning is 1307:18:20, since last attempt is 70:59:58
Retry #85: cumulative duration from beginning is 1378:18:18, since last attempt is 73:16:27
Retry #86: cumulative duration from beginning is 1451:34:45, since last attempt is 75:50:17
Retry #87: cumulative duration from beginning is 1527:25:2, since last attempt is 80:23:55
Retry #88: cumulative duration from beginning is 1607:48:57, since last attempt is 81:41:11
Retry #89: cumulative duration from beginning is 1689:30:8, since last attempt is 83:48:29
Retry #90: cumulative duration from beginning is 1773:18:37, since last attempt is 82:24:16
Retry #91: cumulative duration from beginning is 1855:42:53, since last attempt is 88:31:19
Retry #92: cumulative duration from beginning is 1944:14:12, since last attempt is 88:53:21
Retry #93: cumulative duration from beginning is 2033:7:33, since last attempt is 91:55:18
Retry #94: cumulative duration from beginning is 2125:2:51, since last attempt is 90:38:51
Retry #95: cumulative duration from beginning is 2215:41:42, since last attempt is 96:53:4
Retry #96: cumulative duration from beginning is 2312:34:46, since last attempt is 96:9:40
Retry #97: cumulative duration from beginning is 2408:44:26, since last attempt is 99:26:42
Retry #98: cumulative duration from beginning is 2508:11:8, since last attempt is 98:48:58
Retry #99: cumulative duration from beginning is 2607:0:6, since last attempt is 102:43:45
```

Before:
```
0: cumulative duration from beginning is 0:0:0, since last attempt is 0:0:0
1: cumulative duration from beginning is 0:0:0, since last attempt is 0:0:10
2: cumulative duration from beginning is 0:0:10, since last attempt is 0:0:10
3: cumulative duration from beginning is 0:0:20, since last attempt is 0:0:10
4: cumulative duration from beginning is 0:0:30, since last attempt is 0:0:10
5: cumulative duration from beginning is 0:0:40, since last attempt is 0:0:10
6: cumulative duration from beginning is 0:0:50, since last attempt is 0:0:10
7: cumulative duration from beginning is 0:1:0, since last attempt is 0:0:10
8: cumulative duration from beginning is 0:1:10, since last attempt is 0:0:10
9: cumulative duration from beginning is 0:1:20, since last attempt is 0:0:10
10: cumulative duration from beginning is 0:1:30, since last attempt is 0:0:10
11: cumulative duration from beginning is 0:1:40, since last attempt is 0:0:10
12: cumulative duration from beginning is 0:1:50, since last attempt is 0:1:30
13: cumulative duration from beginning is 0:3:20, since last attempt is 0:3:0
14: cumulative duration from beginning is 0:6:20, since last attempt is 0:4:30
15: cumulative duration from beginning is 0:10:50, since last attempt is 0:6:0
16: cumulative duration from beginning is 0:16:50, since last attempt is 0:7:30
17: cumulative duration from beginning is 0:24:20, since last attempt is 0:9:0
18: cumulative duration from beginning is 0:33:20, since last attempt is 0:10:30
19: cumulative duration from beginning is 0:43:50, since last attempt is 0:12:0
20: cumulative duration from beginning is 0:55:50, since last attempt is 0:13:30
21: cumulative duration from beginning is 1:9:20, since last attempt is 0:15:0
22: cumulative duration from beginning is 1:24:20, since last attempt is 0:16:30
23: cumulative duration from beginning is 1:40:50, since last attempt is 0:18:0
24: cumulative duration from beginning is 1:58:50, since last attempt is 0:30:0
25: cumulative duration from beginning is 2:28:50, since last attempt is 0:30:0
26: cumulative duration from beginning is 2:58:50, since last attempt is 0:30:0
27: cumulative duration from beginning is 3:28:50, since last attempt is 0:30:0
28: cumulative duration from beginning is 3:58:50, since last attempt is 0:30:0
29: cumulative duration from beginning is 4:28:50, since last attempt is 0:30:0
```

### Drive-by changes

<!--
Are there any minor or drive-by changes also included?
-->

### Related issues

<!--
- Fixes #[issue number here]
-->

### Backward compatibility

<!--
Are these changes backward compatible? Are there any infrastructure
implications, e.g. changes that would prohibit deploying older commits
using this infra tooling?

Yes/No
-->

### Testing

<!--
What kind of testing have these changes undergone?

None/Manual/Unit Tests
-->
  • Loading branch information
tkporter authored Feb 13, 2025
1 parent 21edc3a commit 214a8d7
Showing 1 changed file with 53 additions and 12 deletions.
65 changes: 53 additions & 12 deletions rust/main/agents/relayer/src/msg/pending_message.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,20 +653,21 @@ impl PendingMessage {
pub(crate) fn calculate_msg_backoff(num_retries: u32) -> Option<Duration> {
Some(Duration::from_secs(match num_retries {
i if i < 1 => return None,
// wait 10s for the first few attempts; this prevents thrashing
i if (1..12).contains(&i) => 10,
// wait 90s to 19.5min with a linear increase
i if (12..24).contains(&i) => (i as u64 - 11) * 90,
// wait 30min for the next 12 attempts
i if (24..36).contains(&i) => 60 * 30,
// wait 60min for the next 12 attempts
i if (36..48).contains(&i) => 60 * 60,
// linearly increase the backoff time after 48 attempts,
// adding 1h for each additional attempt
i if (1..10).contains(&i) => 10,
i if (10..15).contains(&i) => 90,
i if (15..25).contains(&i) => 60 * 2,
// linearly increase from 2min to ~25min, adding 1.5min for each additional attempt
i if (25..40).contains(&i) => (i as u64 - 23) * 90,
// wait 30min for the next 5 attempts
i if (40..45).contains(&i) => 60 * 30,
// wait 60min for the next 5 attempts
i if (45..50).contains(&i) => 60 * 60,
// linearly increase the backoff time, adding 1h for each additional attempt
_ => {
let hour: u64 = 60 * 60;
// To be extra safe, `max` to make sure it's at least 1 hour.
let target = hour.max((num_retries - 47) as u64 * hour);
let two_hours: u64 = hour * 2;
// To be extra safe, `max` to make sure it's at least 2 hours.
let target = two_hours.max((num_retries - 49) as u64 * two_hours);
// Schedule it at some random point in the next 6 hours to
// avoid scheduling messages with the same # of retries
// at the exact same time and starve new messages.
Expand Down Expand Up @@ -711,3 +712,43 @@ impl MessageSubmissionMetrics {
.set(std::cmp::max(self.last_known_nonce.get(), msg.nonce as i64));
}
}

#[cfg(test)]
mod test {
use std::time::Duration;

use crate::msg::pending_message::PendingMessage;

#[test]
fn test_calculate_msg_backoff_non_decreasing() {
let mut cumulative = Duration::from_secs(0);
let mut last_backoff = Duration::from_secs(0);

// Intentionally only up to 50 because after that we add some randomness that'll cause this test to flake
for i in 0..=50 {
let backoff_duration =
PendingMessage::calculate_msg_backoff(i).unwrap_or(Duration::from_secs(0));
// Uncomment to show the impact of changes to the backoff duration:

// println!(
// "Retry #{}: cumulative duration from beginning is {}, since last attempt is {}",
// i,
// duration_fmt(&cumulative),
// duration_fmt(&backoff_duration)
// );
cumulative += backoff_duration;

assert!(backoff_duration >= last_backoff);
last_backoff = backoff_duration;
}
}

#[allow(dead_code)]
fn duration_fmt(duration: &Duration) -> String {
let duration_total_secs = duration.as_secs();
let seconds = duration_total_secs % 60;
let minutes = (duration_total_secs / 60) % 60;
let hours = (duration_total_secs / 60) / 60;
format!("{}:{}:{}", hours, minutes, seconds)
}
}

0 comments on commit 214a8d7

Please sign in to comment.