diff --git a/core/upgrader/impl/src/lib.rs b/core/upgrader/impl/src/lib.rs index e4fd1aeb1..2ce467bb8 100644 --- a/core/upgrader/impl/src/lib.rs +++ b/core/upgrader/impl/src/lib.rs @@ -1,16 +1,22 @@ +use crate::model::{DisasterRecovery, LogEntry, State}; +use crate::services::set_logs; use crate::upgrade::{ CheckController, Upgrade, Upgrader, WithAuthorization, WithBackground, WithLogs, WithStart, WithStop, }; use candid::Principal; -use ic_cdk::{api::management_canister::main::CanisterInstallMode, init, update}; +use ic_cdk::api::stable::stable_write; +use ic_cdk::{ + api::management_canister::main::CanisterInstallMode, init, post_upgrade, trap, update, +}; use ic_stable_structures::{ memory_manager::{MemoryId, MemoryManager, VirtualMemory}, - DefaultMemoryImpl, StableBTreeMap, + storable::Bound, + DefaultMemoryImpl, StableBTreeMap, Storable, }; use lazy_static::lazy_static; -use orbit_essentials::storable; -use std::{cell::RefCell, sync::Arc, thread::LocalKey}; +use orbit_essentials::types::Timestamp; +use std::{borrow::Cow, cell::RefCell, collections::BTreeMap, sync::Arc}; use upgrade::{UpgradeError, UpgradeParams}; use upgrader_api::{InitArg, TriggerUpgradeError}; @@ -29,45 +35,135 @@ pub mod utils; type Memory = VirtualMemory; type StableMap = StableBTreeMap; type StableValue = StableMap<(), T>; -type LocalRef = &'static LocalKey>; -const MEMORY_ID_TARGET_CANISTER_ID: u8 = 0; -const MEMORY_ID_DISASTER_RECOVERY: u8 = 1; -const MEMORY_ID_LOGS: u8 = 4; +/// Represents one mebibyte. +pub const MIB: u32 = 1 << 20; -thread_local! { - static MEMORY_MANAGER: RefCell> = - RefCell::new(MemoryManager::init(DefaultMemoryImpl::default())); -} +/// Canisters use 64KiB pages for Wasm memory, more details in the PR that introduced this constant: +/// - https://github.com/WebAssembly/design/pull/442#issuecomment-153203031 +pub const WASM_PAGE_SIZE: u32 = 65536; -#[storable] -pub struct StorablePrincipal(Principal); +/// The size of the stable memory bucket in WASM pages. +/// +/// We use a bucket size of 1MiB to ensure that the default memory allocated to the canister is as small as possible, +/// this is due to the fact that this cansiter uses several MemoryIds to manage the stable memory similarly to to how +/// a database arranges data per table. +/// +/// Currently a bucket size of 1MiB limits the canister to 32GiB of stable memory, which is more than enough for the +/// current use case, however, if the canister needs more memory in the future, `ic-stable-structures` will need to be +/// updated to support storing more buckets in a backwards compatible way. +pub const STABLE_MEMORY_BUCKET_SIZE: u16 = (MIB / WASM_PAGE_SIZE) as u16; + +/// Current version of stable memory layout. +pub const STABLE_MEMORY_VERSION: u32 = 1; + +const MEMORY_ID_STATE: u8 = 0; +const MEMORY_ID_LOGS: u8 = 1; thread_local! { - static TARGET_CANISTER_ID: RefCell> = RefCell::new( + static MEMORY_MANAGER: RefCell> = + RefCell::new(MemoryManager::init_with_bucket_size(DefaultMemoryImpl::default(), STABLE_MEMORY_BUCKET_SIZE)); + static STATE: RefCell> = RefCell::new( StableValue::init( - MEMORY_MANAGER.with(|m| m.borrow().get(MemoryId::new(MEMORY_ID_TARGET_CANISTER_ID))), + MEMORY_MANAGER.with(|m| m.borrow().get(MemoryId::new(MEMORY_ID_STATE))), ) ); } +fn get_state() -> State { + STATE.with(|storage| storage.borrow().get(&()).unwrap_or_default()) +} + +fn set_state(state: State) { + STATE.with(|storage| storage.borrow_mut().insert((), state)); +} + +pub fn get_target_canister() -> Principal { + get_state().target_canister +} + +pub fn set_target_canister(target_canister: Principal) { + let mut state = get_state(); + state.target_canister = target_canister; + set_state(state); +} + +pub fn get_disaster_recovery() -> DisasterRecovery { + get_state().disaster_recovery +} + +pub fn set_disaster_recovery(value: DisasterRecovery) { + let mut state = get_state(); + state.disaster_recovery = value; + set_state(state); +} + #[init] fn init_fn(InitArg { target_canister }: InitArg) { - TARGET_CANISTER_ID.with(|id| { - let mut id = id.borrow_mut(); - id.insert((), StorablePrincipal(target_canister)); - }); + set_target_canister(target_canister); +} + +#[post_upgrade] +fn post_upgrade() { + pub struct RawBytes(pub Vec); + impl Storable for RawBytes { + fn to_bytes(&self) -> Cow<[u8]> { + trap("RawBytes should never be serialized") + } + + fn from_bytes(bytes: Cow<[u8]>) -> Self { + Self(bytes.to_vec()) + } + + const BOUND: Bound = Bound::Unbounded; + } + + const OLD_MEMORY_ID_TARGET_CANISTER_ID: u8 = 0; + const OLD_MEMORY_ID_DISASTER_RECOVERY: u8 = 1; + const OLD_MEMORY_ID_LOGS: u8 = 4; + + let old_memory_manager = MemoryManager::init(DefaultMemoryImpl::default()); + + // determine stable memory layout by trying to parse the target canister id from OLD_MEMORY_ID_TARGET_CANISTER_ID + let old_target_canister_bytes: StableValue = + StableValue::init(old_memory_manager.get(MemoryId::new(OLD_MEMORY_ID_TARGET_CANISTER_ID))); + let target_canister_bytes = old_target_canister_bytes + .get(&()) + .unwrap_or_else(|| trap("Could not determine stable memory layout.")); + if let Ok(target_canister) = serde_cbor::from_slice::(&target_canister_bytes.0) { + let old_disaster_recovery: StableValue = StableValue::init( + old_memory_manager.get(MemoryId::new(OLD_MEMORY_ID_DISASTER_RECOVERY)), + ); + let disaster_recovery: DisasterRecovery = + old_disaster_recovery.get(&()).unwrap_or_default(); + + let old_logs: StableBTreeMap = + StableBTreeMap::init(old_memory_manager.get(MemoryId::new(OLD_MEMORY_ID_LOGS))); + let logs: BTreeMap = old_logs.iter().collect(); + + // clear the magic header of stable structures to force their reinitialization + // https://github.com/dfinity/stable-structures/blob/69ed47f9b5001af67d650c714cd56ec3ee0ef2bb/src/memory_manager.rs#L254-L256 + stable_write(0, &[0; 3]); + + let state = State { + target_canister, + disaster_recovery, + stable_memory_version: STABLE_MEMORY_VERSION, + }; + set_state(state); + set_logs(logs); + } } lazy_static! { static ref UPGRADER: Box = { - let u = Upgrader::new(&TARGET_CANISTER_ID); - let u = WithStop(u, &TARGET_CANISTER_ID); - let u = WithStart(u, &TARGET_CANISTER_ID); + let u = Upgrader {}; + let u = WithStop(u); + let u = WithStart(u); let u = WithLogs(u, "upgrade".to_string()); - let u = WithBackground(Arc::new(u), &TARGET_CANISTER_ID); - let u = CheckController(u, &TARGET_CANISTER_ID); - let u = WithAuthorization(u, &TARGET_CANISTER_ID); + let u = WithBackground(Arc::new(u)); + let u = CheckController(u); + let u = WithAuthorization(u); let u = WithLogs(u, "trigger_upgrade".to_string()); Box::new(u) }; diff --git a/core/upgrader/impl/src/model/mod.rs b/core/upgrader/impl/src/model/mod.rs index 325391b68..0944f5eb4 100644 --- a/core/upgrader/impl/src/model/mod.rs +++ b/core/upgrader/impl/src/model/mod.rs @@ -3,3 +3,24 @@ mod logging; pub use disaster_recovery::*; pub use logging::*; + +use crate::STABLE_MEMORY_VERSION; +use candid::Principal; +use orbit_essentials::storable; + +#[storable] +pub struct State { + pub target_canister: Principal, + pub disaster_recovery: DisasterRecovery, + pub stable_memory_version: u32, +} + +impl Default for State { + fn default() -> Self { + Self { + target_canister: Principal::anonymous(), + disaster_recovery: Default::default(), + stable_memory_version: STABLE_MEMORY_VERSION, + } + } +} diff --git a/core/upgrader/impl/src/services/disaster_recovery.rs b/core/upgrader/impl/src/services/disaster_recovery.rs index 2ed93405c..54f198f10 100644 --- a/core/upgrader/impl/src/services/disaster_recovery.rs +++ b/core/upgrader/impl/src/services/disaster_recovery.rs @@ -1,27 +1,25 @@ -use std::{cell::RefCell, collections::HashMap, sync::Arc}; +use std::{collections::HashMap, sync::Arc}; use crate::{ errors::UpgraderApiError, + get_disaster_recovery, get_target_canister, model::{ Asset, DisasterRecoveryInProgressLog, DisasterRecoveryResultLog, DisasterRecoveryStartLog, LogEntryType, MultiAssetAccount, RequestDisasterRecoveryLog, SetAccountsAndAssetsLog, SetAccountsLog, SetCommitteeLog, }, services::LOGGER_SERVICE, + set_disaster_recovery, upgrader_ic_cdk::{api::time, spawn}, }; use candid::Principal; -use ic_stable_structures::memory_manager::MemoryId; use lazy_static::lazy_static; use orbit_essentials::{api::ServiceResult, utils::sha256_hash}; -use crate::{ - model::{ - Account, AdminUser, DisasterRecovery, DisasterRecoveryCommittee, InstallMode, - RecoveryEvaluationResult, RecoveryFailure, RecoveryResult, RecoveryStatus, - StationRecoveryRequest, - }, - StableValue, MEMORY_ID_DISASTER_RECOVERY, MEMORY_MANAGER, TARGET_CANISTER_ID, +use crate::model::{ + Account, AdminUser, DisasterRecovery, DisasterRecoveryCommittee, InstallMode, + RecoveryEvaluationResult, RecoveryFailure, RecoveryResult, RecoveryStatus, + StationRecoveryRequest, }; use super::{InstallCanister, LoggerService, INSTALL_CANISTER}; @@ -29,16 +27,6 @@ use super::{InstallCanister, LoggerService, INSTALL_CANISTER}; pub const DISASTER_RECOVERY_REQUEST_EXPIRATION_NS: u64 = 60 * 60 * 24 * 7 * 1_000_000_000; // 1 week pub const DISASTER_RECOVERY_IN_PROGESS_EXPIRATION_NS: u64 = 60 * 60 * 1_000_000_000; // 1 hour -thread_local! { - - static STORAGE: RefCell> = RefCell::new( - StableValue::init( - MEMORY_MANAGER.with(|m| m.borrow().get(MemoryId::new(MEMORY_ID_DISASTER_RECOVERY))), - ) - ); - -} - lazy_static! { pub static ref DISASTER_RECOVERY_SERVICE: Arc = Arc::new(DisasterRecoveryService { @@ -83,11 +71,11 @@ pub struct DisasterRecoveryStorage {} impl DisasterRecoveryStorage { pub fn get(&self) -> DisasterRecovery { - STORAGE.with(|storage| storage.borrow().get(&()).unwrap_or_default()) + get_disaster_recovery() } fn set(&self, value: DisasterRecovery) { - STORAGE.with(|storage| storage.borrow_mut().insert((), value)); + set_disaster_recovery(value); } } @@ -282,15 +270,7 @@ impl DisasterRecoveryService { return; } - let Some(station_canister_id) = - TARGET_CANISTER_ID.with(|id| id.borrow().get(&()).map(|id| id.0)) - else { - value.last_recovery_result = Some(RecoveryResult::Failure(RecoveryFailure { - reason: "Station canister ID not set".to_string(), - })); - storage.set(value); - return; - }; + let station_canister_id = get_target_canister(); value.recovery_status = RecoveryStatus::InProgress { since: time() }; storage.set(value); @@ -415,7 +395,6 @@ mod tests { services::{ DisasterRecoveryService, DisasterRecoveryStorage, InstallCanister, LoggerService, }, - StorablePrincipal, TARGET_CANISTER_ID, }; #[derive(Default)] @@ -575,11 +554,6 @@ mod tests { #[tokio::test] async fn test_do_recovery() { - TARGET_CANISTER_ID.with(|id| { - id.borrow_mut() - .insert((), StorablePrincipal(Principal::anonymous())); - }); - let storage: DisasterRecoveryStorage = Default::default(); let logger = Arc::new(LoggerService::default()); let recovery_request = StationRecoveryRequest { @@ -655,51 +629,8 @@ mod tests { ); } - #[tokio::test] - async fn test_failing_do_recovery_with_no_target_canister_id() { - // setup: TARGET_CANISTER_ID is not set, so recovery should fail - - let storage: DisasterRecoveryStorage = Default::default(); - let logger = Arc::new(LoggerService::default()); - let recovery_request = StationRecoveryRequest { - user_id: [1; 16], - wasm_module: vec![1, 2, 3], - wasm_module_extra_chunks: None, - wasm_sha256: vec![4, 5, 6], - install_mode: InstallMode::Reinstall, - arg: vec![7, 8, 9], - arg_sha256: vec![10, 11, 12], - submitted_at: 0, - }; - - let installer = Arc::new(TestInstaller::default()); - - DisasterRecoveryService::do_recovery( - storage.clone(), - installer.clone(), - logger.clone(), - recovery_request.clone(), - ) - .await; - - assert!(matches!( - storage.get().last_recovery_result, - Some(RecoveryResult::Failure(_)) - )); - - assert!(matches!( - storage.get().recovery_status, - RecoveryStatus::Idle - )); - } - #[tokio::test] async fn test_failing_do_recovery_with_panicking_install() { - TARGET_CANISTER_ID.with(|id| { - id.borrow_mut() - .insert((), StorablePrincipal(Principal::anonymous())); - }); - let storage: DisasterRecoveryStorage = Default::default(); let logger = Arc::new(LoggerService::default()); let recovery_request = StationRecoveryRequest { diff --git a/core/upgrader/impl/src/services/logger.rs b/core/upgrader/impl/src/services/logger.rs index faf6f9c4d..c0daed3a7 100644 --- a/core/upgrader/impl/src/services/logger.rs +++ b/core/upgrader/impl/src/services/logger.rs @@ -1,6 +1,6 @@ -use std::{cell::RefCell, sync::Arc}; +use std::{cell::RefCell, collections::BTreeMap, sync::Arc}; -use ic_stable_structures::{memory_manager::MemoryId, BTreeMap}; +use ic_stable_structures::{memory_manager::MemoryId, StableBTreeMap}; use lazy_static::lazy_static; use orbit_essentials::types::Timestamp; @@ -14,13 +14,22 @@ pub const DEFAULT_GET_LOGS_LIMIT: u64 = 10; pub const MAX_LOG_ENTRIES: u64 = 25000; thread_local! { - static STORAGE: RefCell> = RefCell::new( - BTreeMap::init( + static STORAGE: RefCell> = RefCell::new( + StableBTreeMap::init( MEMORY_MANAGER.with(|m| m.borrow().get(MemoryId::new(MEMORY_ID_LOGS))), ) ); } +// only use this function for stable memory migration! +pub fn set_logs(logs: BTreeMap) { + STORAGE.with(|storage| { + for (timestamp, log) in logs { + storage.borrow_mut().insert(timestamp, log); + } + }); +} + lazy_static! { pub static ref LOGGER_SERVICE: Arc = Arc::new(LoggerService::default()); } diff --git a/core/upgrader/impl/src/upgrade.rs b/core/upgrader/impl/src/upgrade.rs index 3e4ec301b..c2ad2445b 100644 --- a/core/upgrader/impl/src/upgrade.rs +++ b/core/upgrader/impl/src/upgrade.rs @@ -1,11 +1,10 @@ use crate::{ + get_target_canister, model::{LogEntryType, UpgradeResultLog}, services::LOGGER_SERVICE, - LocalRef, StableValue, StorablePrincipal, }; -use anyhow::{anyhow, Context}; +use anyhow::anyhow; use async_trait::async_trait; -use candid::Principal; use ic_cdk::api::management_canister::main::{ self as mgmt, CanisterIdRecord, CanisterInfoRequest, CanisterInstallMode, }; @@ -41,23 +40,12 @@ pub trait Upgrade: 'static + Sync + Send { } #[derive(Clone)] -pub struct Upgrader { - target: LocalRef>, -} - -impl Upgrader { - pub fn new(target: LocalRef>) -> Self { - Self { target } - } -} +pub struct Upgrader {} #[async_trait] impl Upgrade for Upgrader { async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { - let target_canister = self - .target - .with(|id| id.borrow().get(&()).context("canister id not set"))? - .0; + let target_canister = get_target_canister(); install_chunked_code( target_canister, @@ -71,17 +59,15 @@ impl Upgrade for Upgrader { } } -pub struct WithStop(pub T, pub LocalRef>); +pub struct WithStop(pub T); #[async_trait] impl Upgrade for WithStop { /// Perform an upgrade but ensure that the target canister is stopped first async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { - let id = self - .1 - .with(|id| id.borrow().get(&()).context("canister id not set"))?; + let id = get_target_canister(); - mgmt::stop_canister(CanisterIdRecord { canister_id: id.0 }) + mgmt::stop_canister(CanisterIdRecord { canister_id: id }) .await .map_err(|(_, err)| anyhow!("failed to stop canister: {err}"))?; @@ -89,7 +75,7 @@ impl Upgrade for WithStop { } } -pub struct WithStart(pub T, pub LocalRef>); +pub struct WithStart(pub T); #[async_trait] impl Upgrade for WithStart { @@ -98,11 +84,9 @@ impl Upgrade for WithStart { async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { let out = self.0.upgrade(ps).await; - let id = self - .1 - .with(|id| id.borrow().get(&()).context("canister id not set"))?; + let id = get_target_canister(); - mgmt::start_canister(CanisterIdRecord { canister_id: id.0 }) + mgmt::start_canister(CanisterIdRecord { canister_id: id }) .await .map_err(|(_, err)| anyhow!("failed to start canister: {err}"))?; @@ -110,7 +94,7 @@ impl Upgrade for WithStart { } } -pub struct WithBackground(pub Arc, pub LocalRef>); +pub struct WithBackground(pub Arc); #[async_trait] impl Upgrade for WithBackground { @@ -118,37 +102,32 @@ impl Upgrade for WithBackground { /// so that it is performed in a non-blocking manner async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { let u = self.0.clone(); - let target_canister_id: Option = - self.1.with(|p| p.borrow().get(&()).map(|sp| sp.0)); + let target_canister_id = get_target_canister(); ic_cdk::spawn(async move { let res = u.upgrade(ps).await; - // Notify the target canister about a failed upgrade unless the call is unauthorized - // (we don't want to spam the target canister with such errors). - if let Some(target_canister_id) = target_canister_id { - if let Err(ref err) = res { - let err = match err { - UpgradeError::UnexpectedError(err) => Some(err.to_string()), - UpgradeError::NotController => Some( - "The upgrader canister is not a controller of the target canister" - .to_string(), - ), - UpgradeError::Unauthorized => None, - }; - if let Some(err) = err { - let notify_failed_station_upgrade_input = - NotifyFailedStationUpgradeInput { reason: err }; - let notify_res = call::<_, (ApiResult<()>,)>( - target_canister_id, - "notify_failed_station_upgrade", - (notify_failed_station_upgrade_input,), - ) - .await - .map(|r| r.0); - // Log an error if the notification can't be made. - if let Err(e) = notify_res { - print(format!("notify_failed_station_upgrade failed: {:?}", e)); - } + if let Err(ref err) = res { + let err = match err { + UpgradeError::UnexpectedError(err) => Some(err.to_string()), + UpgradeError::NotController => Some( + "The upgrader canister is not a controller of the target canister" + .to_string(), + ), + UpgradeError::Unauthorized => None, + }; + if let Some(err) = err { + let notify_failed_station_upgrade_input = + NotifyFailedStationUpgradeInput { reason: err }; + let notify_res = call::<_, (ApiResult<()>,)>( + target_canister_id, + "notify_failed_station_upgrade", + (notify_failed_station_upgrade_input,), + ) + .await + .map(|r| r.0); + // Log an error if the notification can't be made. + if let Err(e) = notify_res { + print(format!("notify_failed_station_upgrade failed: {:?}", e)); } } } @@ -158,16 +137,14 @@ impl Upgrade for WithBackground { } } -pub struct WithAuthorization(pub T, pub LocalRef>); +pub struct WithAuthorization(pub T); #[async_trait] impl Upgrade for WithAuthorization { async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { - let id = self - .1 - .with(|id| id.borrow().get(&()).context("canister id not set"))?; + let id = get_target_canister(); - if !ic_cdk::caller().eq(&id.0) { + if !ic_cdk::caller().eq(&id) { return Err(UpgradeError::Unauthorized); } @@ -175,17 +152,15 @@ impl Upgrade for WithAuthorization { } } -pub struct CheckController(pub T, pub LocalRef>); +pub struct CheckController(pub T); #[async_trait] impl Upgrade for CheckController { async fn upgrade(&self, ps: UpgradeParams) -> Result<(), UpgradeError> { - let id = self - .1 - .with(|id| id.borrow().get(&()).context("canister id not set"))?; + let id = get_target_canister(); let (resp,) = mgmt::canister_info(CanisterInfoRequest { - canister_id: id.0, + canister_id: id, num_requested_changes: None, }) .await