Skip to content

Commit

Permalink
Add heartbeat interval as a config parameter (#476)
Browse files Browse the repository at this point in the history
  • Loading branch information
danielle-tfh authored Sep 30, 2024
1 parent 170b19e commit 402d565
Show file tree
Hide file tree
Showing 7 changed files with 26 additions and 5 deletions.
2 changes: 1 addition & 1 deletion deploy/stage/common-values-iris-mpc.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
image: "ghcr.io/worldcoin/iris-mpc:v0.8.16"
image: "ghcr.io/worldcoin/iris-mpc:v0.8.18"

environment: stage
replicaCount: 1
Expand Down
3 changes: 3 additions & 0 deletions deploy/stage/mpc1-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ env:

- name: SMPC__PROCESSING_TIMEOUT_SECS
value: "60"

- name: SMPC__HEARTBEAT_INTERVAL_SECS
value: "5"

- name: SMPC__PATH
value: "/data/"
Expand Down
3 changes: 3 additions & 0 deletions deploy/stage/mpc2-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,9 @@ env:

- name: SMPC__PROCESSING_TIMEOUT_SECS
value: "60"

- name: SMPC__HEARTBEAT_INTERVAL_SECS
value: "5"

- name: SMPC__PATH
value: "/data/"
Expand Down
3 changes: 3 additions & 0 deletions deploy/stage/mpc3-stage/values-iris-mpc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ env:
- name: SMPC__PROCESSING_TIMEOUT_SECS
value: "60"

- name: SMPC__HEARTBEAT_INTERVAL_SECS
value: "5"

- name: SMPC__PATH
value: "/data/"

Expand Down
7 changes: 7 additions & 0 deletions iris-mpc-common/src/config/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ pub struct Config {

#[serde(default = "default_max_batch_size")]
pub max_batch_size: usize,

#[serde(default = "default_heartbeat_interval_secs")]
pub heartbeat_interval_secs: u64,
}

fn default_processing_timeout_secs() -> u64 {
Expand All @@ -70,6 +73,10 @@ fn default_max_batch_size() -> usize {
64
}

fn default_heartbeat_interval_secs() -> u64 {
30
}

impl Config {
pub fn load_config(prefix: &str) -> eyre::Result<Config> {
let settings = config::Config::builder();
Expand Down
7 changes: 4 additions & 3 deletions iris-mpc-gpu/src/server/heartbeat_nccl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ use tokio::{
task::{spawn_blocking, JoinHandle},
time::timeout,
};
const HEARBEAT_INTERVAL: Duration = Duration::from_secs(5);

pub async fn start_heartbeat(
party_id: usize,
main_tx: oneshot::Sender<eyre::Result<()>>,
heartbeat_interval_secs: u64,
) -> eyre::Result<()> {
let (tx, mut rx) = mpsc::channel(1);
let heartbeat_interval = Duration::from_secs(heartbeat_interval_secs);

let heartbeat_handle: JoinHandle<eyre::Result<()>> = spawn_blocking(move || {
let device_manager = Arc::new(DeviceManager::init_with_streams());
Expand Down Expand Up @@ -61,7 +62,7 @@ pub async fn start_heartbeat(
Ok(())
}())?;
}
std::thread::sleep(HEARBEAT_INTERVAL);
std::thread::sleep(heartbeat_interval);
counter += 1;
}
});
Expand All @@ -74,7 +75,7 @@ pub async fn start_heartbeat(
match timeout(timeout_interval, rx.recv()).await {
// The first heartbeat might take a while due to retries. However, after the connection
// is established, we switch to the normal heartbeat interval.
Ok(Some(Ok(_))) => timeout_interval = 2 * HEARBEAT_INTERVAL,
Ok(Some(Ok(_))) => timeout_interval = 2 * heartbeat_interval,
Ok(None) => {
tracing::error!("Heartbeat: Channel closed.");
break;
Expand Down
6 changes: 5 additions & 1 deletion iris-mpc/src/bin/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,11 @@ async fn server_main(config: Config) -> eyre::Result<()> {
let mut background_tasks = TaskMonitor::new();

let (tx, rx) = oneshot::channel();
let _heartbeat = background_tasks.spawn(start_heartbeat(config.party_id, tx));
let _heartbeat = background_tasks.spawn(start_heartbeat(
config.party_id,
tx,
config.heartbeat_interval_secs,
));

background_tasks.check_tasks();
tracing::info!("Heartbeat starting...");
Expand Down

0 comments on commit 402d565

Please sign in to comment.