From 4d47cf6bf3431f0ef0a697aa0827592bb38f8c26 Mon Sep 17 00:00:00 2001 From: Akosh Farkash Date: Thu, 29 Feb 2024 20:04:37 +0000 Subject: [PATCH] ENG-578: Wait in a loop for the APIs to start --- .../testing/materializer/src/docker/node.rs | 13 +++- fendermint/testing/materializer/src/lib.rs | 11 +++ .../testing/materializer/tests/docker.rs | 72 ++++++++++++++++--- 3 files changed, 85 insertions(+), 11 deletions(-) diff --git a/fendermint/testing/materializer/src/docker/node.rs b/fendermint/testing/materializer/src/docker/node.rs index 3bfbc94be..1863171cd 100644 --- a/fendermint/testing/materializer/src/docker/node.rs +++ b/fendermint/testing/materializer/src/docker/node.rs @@ -5,6 +5,7 @@ use std::{ collections::BTreeMap, os::unix::fs::MetadataExt, path::{Path, PathBuf}, + str::FromStr, }; use anyhow::{anyhow, bail, Context}; @@ -21,7 +22,7 @@ use crate::{ env_vars, materializer::{NodeConfig, TargetConfig}, materials::export_file, - HasEthApi, NodeName, ResourceHash, + HasCometBftApi, HasEthApi, NodeName, ResourceHash, }; // TODO: Add these to the materializer. @@ -475,6 +476,16 @@ impl HasEthApi for DockerNode { } } +impl HasCometBftApi for DockerNode { + fn cometbft_http_endpoint(&self) -> tendermint_rpc::Url { + tendermint_rpc::Url::from_str(&format!( + "http://127.0.0.1:{}", + self.port_range.cometbft_rpc_host_port() + )) + .unwrap() + } +} + /// Create a container name from a node name and a logical container name, e.g. "cometbft" /// in a way that we can use it as a hostname without being too long. /// diff --git a/fendermint/testing/materializer/src/lib.rs b/fendermint/testing/materializer/src/lib.rs index 021425810..ccddc91ae 100644 --- a/fendermint/testing/materializer/src/lib.rs +++ b/fendermint/testing/materializer/src/lib.rs @@ -323,6 +323,17 @@ pub trait HasEthApi { } } +pub trait HasCometBftApi { + /// URL of the HTTP endpoint. + fn cometbft_http_endpoint(&self) -> tendermint_rpc::Url; + + fn cometbft_http_provider(&self) -> anyhow::Result { + Ok(tendermint_rpc::HttpClient::new( + self.cometbft_http_endpoint(), + )?) + } +} + #[cfg(test)] mod tests { use std::path::PathBuf; diff --git a/fendermint/testing/materializer/tests/docker.rs b/fendermint/testing/materializer/tests/docker.rs index 17a8aa89e..0bbac4c9a 100644 --- a/fendermint/testing/materializer/tests/docker.rs +++ b/fendermint/testing/materializer/tests/docker.rs @@ -6,23 +6,33 @@ //! //! `cargo test -p fendermint_materializer --test docker -- --nocapture` -use std::{env::current_dir, path::PathBuf, pin::Pin, time::Duration}; +use std::{ + collections::BTreeSet, + env::current_dir, + path::PathBuf, + pin::Pin, + time::{Duration, Instant}, +}; -use anyhow::Context; +use anyhow::{anyhow, Context}; +use ethers::providers::Middleware; use fendermint_materializer::{ docker::{DockerMaterializer, DockerMaterials}, manifest::Manifest, testnet::Testnet, validation::validate_manifest, - TestnetName, + HasCometBftApi, HasEthApi, TestnetName, }; use futures::Future; use lazy_static::lazy_static; +use tendermint_rpc::Client; + +pub type DockerTestnet = Testnet; lazy_static! { static ref CI_PROFILE: bool = std::env::var("PROFILE").unwrap_or_default() == "ci"; - static ref STARTUP_WAIT_SECS: u64 = if *CI_PROFILE { 20 } else { 15 }; - static ref TEARDOWN_WAIT_SECS: u64 = 5; + static ref STARTUP_TIMEOUT: Duration = Duration::from_secs(60); + static ref TEARDOWN_TIMEOUT: Duration = Duration::from_secs(5); static ref PRINT_LOGS_ON_ERROR: bool = *CI_PROFILE; } @@ -62,7 +72,7 @@ where F: for<'a> FnOnce( &Manifest, &mut DockerMaterializer, - &'a mut Testnet, + &'a mut DockerTestnet, ) -> Pin> + 'a>>, { let testnet_name = TestnetName::new( @@ -92,10 +102,13 @@ where .await .context("failed to set up testnet")?; - // Allow time for things to consolidate and blocks to be created. - tokio::time::sleep(Duration::from_secs(*STARTUP_WAIT_SECS)).await; + let started = wait_for_startup(&testnet).await?; - let res = f(&manifest, &mut materializer, &mut testnet).await; + let res = if started { + f(&manifest, &mut materializer, &mut testnet).await + } else { + Err(anyhow!("the startup sequence timed out")) + }; // Print all logs on failure. // Some might be available in logs in the files which are left behind, @@ -123,11 +136,50 @@ where // otherwise the system shuts down too quick, but // at least we can inspect the containers. // If they don't all get dropped, `docker system prune` helps. - tokio::time::sleep(Duration::from_secs(*TEARDOWN_WAIT_SECS)).await; + tokio::time::sleep(*TEARDOWN_TIMEOUT).await; res } +/// Allow time for things to consolidate and APIs to start. +async fn wait_for_startup(testnet: &DockerTestnet) -> anyhow::Result { + let start = Instant::now(); + let mut started = BTreeSet::new(); + + 'startup: loop { + if start.elapsed() > *STARTUP_TIMEOUT { + return Ok(false); + } + tokio::time::sleep(Duration::from_secs(5)).await; + + for (name, dnode) in testnet.nodes() { + if started.contains(name) { + continue; + } + + let client = dnode.cometbft_http_provider()?; + + if let Err(e) = client.abci_info().await { + eprintln!("CometBFT on {name} still fails: {e}"); + continue 'startup; + } + + if let Some(client) = dnode.ethapi_http_provider()? { + if let Err(e) = client.get_chainid().await { + eprintln!("EthAPI on {name} still fails: {e}"); + continue 'startup; + } + } + + eprintln!("APIs on {name} started"); + started.insert(name.clone()); + } + + // All of them succeeded. + return Ok(true); + } +} + // Run these tests serially because they share a common `materializer-state.json` file with the port mappings. // Unfortunately the `#[serial]` macro can only be applied to module blocks, not this. mod docker_tests;