From 2e10c4b55b3119cb5d0f6ffe8579262d718c9fb2 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Mon, 13 Nov 2023 20:54:32 +0100 Subject: [PATCH 1/3] PVF: fix detection of unshare-and-change-root security capability One of our security features isn't working on a user's machine. Right now this feature should be optional, but the bug is that the node detects that the feature works, so the worker tries to apply it. The problem is that the detection and application are happening in different filesystems. Detection tries /tmp whereas application tries the database path. User reported back: > there are different permissions for the /tmp/ folder (root ownership) vs the .local folder (my user account) --- Cargo.lock | 1 + .../node/core/candidate-validation/src/lib.rs | 2 +- polkadot/node/core/pvf/Cargo.toml | 1 + .../node/core/pvf/common/src/worker/mod.rs | 6 +++--- polkadot/node/core/pvf/src/host.rs | 10 +++++++--- polkadot/node/core/pvf/src/security.rs | 18 ++++++++++++------ polkadot/node/core/pvf/tests/it/main.rs | 2 +- 7 files changed, 26 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2a091ce6817da..382406dd8c92d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12462,6 +12462,7 @@ dependencies = [ "polkadot-node-core-pvf-prepare-worker", "polkadot-node-metrics", "polkadot-node-primitives", + "polkadot-node-subsystem", "polkadot-parachain-primitives", "polkadot-primitives", "procfs", diff --git a/polkadot/node/core/candidate-validation/src/lib.rs b/polkadot/node/core/candidate-validation/src/lib.rs index 89ea02728840e..4232e5f1cdd88 100644 --- a/polkadot/node/core/candidate-validation/src/lib.rs +++ b/polkadot/node/core/candidate-validation/src/lib.rs @@ -150,7 +150,7 @@ async fn run( ), pvf_metrics, ) - .await; + .await?; ctx.spawn_blocking("pvf-validation-host", task.boxed())?; loop { diff --git a/polkadot/node/core/pvf/Cargo.toml b/polkadot/node/core/pvf/Cargo.toml index 430f7cd5e8ef1..3e72ca9e5326e 100644 --- a/polkadot/node/core/pvf/Cargo.toml +++ b/polkadot/node/core/pvf/Cargo.toml @@ -27,6 +27,7 @@ polkadot-core-primitives = { path = "../../../core-primitives" } polkadot-node-core-pvf-common = { path = "common" } polkadot-node-metrics = { path = "../../metrics" } polkadot-node-primitives = { path = "../../primitives" } +polkadot-node-subsystem = { path = "../../subsystem" } polkadot-primitives = { path = "../../../primitives" } sp-core = { path = "../../../../substrate/primitives/core" } diff --git a/polkadot/node/core/pvf/common/src/worker/mod.rs b/polkadot/node/core/pvf/common/src/worker/mod.rs index 274a2fc80397b..f6a67b98321a3 100644 --- a/polkadot/node/core/pvf/common/src/worker/mod.rs +++ b/polkadot/node/core/pvf/common/src/worker/mod.rs @@ -92,13 +92,13 @@ macro_rules! decl_worker_main { std::process::exit(status) }, "--check-can-unshare-user-namespace-and-change-root" => { + #[cfg(target_os = "linux")] + let cache_path_tempdir = std::path::Path::new(&args[2]); #[cfg(target_os = "linux")] let status = if let Err(err) = security::unshare_user_namespace_and_change_root( $crate::worker::WorkerKind::CheckPivotRoot, worker_pid, - // We're not accessing any files, so we can try to pivot_root in the temp - // dir without conflicts with other processes. - &std::env::temp_dir(), + &cache_path_tempdir, ) { // Write the error to stderr, log it on the host-side. eprintln!("{}", err); diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index 7b383e8034a72..f893189ba2b10 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -35,6 +35,7 @@ use polkadot_node_core_pvf_common::{ error::{PrepareError, PrepareResult}, pvf::PvfPrepData, }; +use polkadot_node_subsystem::SubsystemResult; use polkadot_parachain_primitives::primitives::ValidationResult; use std::{ collections::HashMap, @@ -203,11 +204,14 @@ impl Config { /// The future should not return normally but if it does then that indicates an unrecoverable error. /// In that case all pending requests will be canceled, dropping the result senders and new ones /// will be rejected. -pub async fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Future) { +pub async fn start( + config: Config, + metrics: Metrics, +) -> SubsystemResult<(ValidationHost, impl Future)> { gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host"); // Run checks for supported security features once per host startup. Warn here if not enabled. - let security_status = security::check_security_status(&config).await; + let security_status = security::check_security_status(&config).await?; let (to_host_tx, to_host_rx) = mpsc::channel(10); @@ -273,7 +277,7 @@ pub async fn start(config: Config, metrics: Metrics) -> (ValidationHost, impl Fu }; }; - (validation_host, task) + Ok((validation_host, task)) } /// A mapping from an artifact ID which is in preparation state to the list of pending execution diff --git a/polkadot/node/core/pvf/src/security.rs b/polkadot/node/core/pvf/src/security.rs index 295dd7df94dd8..27c95cb666623 100644 --- a/polkadot/node/core/pvf/src/security.rs +++ b/polkadot/node/core/pvf/src/security.rs @@ -14,9 +14,9 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{Config, SecurityStatus, LOG_TARGET}; +use crate::{worker_intf::tmppath_in, Config, SecurityStatus, LOG_TARGET}; use futures::join; -use std::{fmt, path::Path}; +use std::{fmt, io, path::Path}; use tokio::{ fs::{File, OpenOptions}, io::{AsyncReadExt, AsyncSeekExt, SeekFrom}, @@ -27,14 +27,18 @@ const SECURE_MODE_ANNOUNCEMENT: &'static str = \nMore information: https://wiki.polkadot.network/docs/maintain-guides-secure-validator#secure-validator-mode"; /// Run checks for supported security features. -pub async fn check_security_status(config: &Config) -> SecurityStatus { - let Config { prepare_worker_program_path, .. } = config; +pub async fn check_security_status(config: &Config) -> io::Result { + let Config { prepare_worker_program_path, cache_path, .. } = config; // TODO: add check that syslog is available and that seccomp violations are logged? + let cache_dir_tempdir = tmppath_in("check-can-unshare", &cache_path).await?; let (landlock, seccomp, change_root) = join!( check_landlock(prepare_worker_program_path), check_seccomp(prepare_worker_program_path), - check_can_unshare_user_namespace_and_change_root(prepare_worker_program_path) + check_can_unshare_user_namespace_and_change_root( + prepare_worker_program_path, + &cache_dir_tempdir + ) ); let security_status = SecurityStatus { @@ -56,7 +60,7 @@ pub async fn check_security_status(config: &Config) -> SecurityStatus { ); } - security_status + Ok(security_status) } type SecureModeResult = std::result::Result<(), SecureModeError>; @@ -149,11 +153,13 @@ fn print_secure_mode_message(errs: Vec) -> bool { async fn check_can_unshare_user_namespace_and_change_root( #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] prepare_worker_program_path: &Path, + #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] cache_dir_tempdir: &Path, ) -> SecureModeResult { cfg_if::cfg_if! { if #[cfg(target_os = "linux")] { match tokio::process::Command::new(prepare_worker_program_path) .arg("--check-can-unshare-user-namespace-and-change-root") + .arg(cache_dir_tempdir) .output() .await { diff --git a/polkadot/node/core/pvf/tests/it/main.rs b/polkadot/node/core/pvf/tests/it/main.rs index f4fd7f802f5e9..801b60884fa36 100644 --- a/polkadot/node/core/pvf/tests/it/main.rs +++ b/polkadot/node/core/pvf/tests/it/main.rs @@ -61,7 +61,7 @@ impl TestHost { execute_worker_path, ); f(&mut config); - let (host, task) = start(config, Metrics::default()).await; + let (host, task) = start(config, Metrics::default()).await.unwrap(); let _ = tokio::task::spawn(task); Self { cache_dir, host: Mutex::new(host) } } From 6b553ee61000bc2801ded85d017d0c5a78c4ac90 Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Mon, 13 Nov 2023 21:18:52 +0100 Subject: [PATCH 2/3] Fix bench --- polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs index acd80526262c0..d0cefae6cdbf2 100644 --- a/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs +++ b/polkadot/node/core/pvf/benches/host_prepare_rococo_runtime.rs @@ -47,7 +47,7 @@ impl TestHost { execute_worker_path, ); f(&mut config); - let (host, task) = start(config, Metrics::default()).await; + let (host, task) = start(config, Metrics::default()).await.unwrap(); let _ = handle.spawn(task); Self { host: Mutex::new(host) } } From 6b92392136e7b8f5a1b5f29f891b632761f761aa Mon Sep 17 00:00:00 2001 From: "Marcin S." Date: Tue, 14 Nov 2023 13:51:27 +0100 Subject: [PATCH 3/3] Don't abort on error checking security features --- polkadot/node/core/pvf/src/host.rs | 2 +- polkadot/node/core/pvf/src/security.rs | 30 +++++++++++++++++--------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/polkadot/node/core/pvf/src/host.rs b/polkadot/node/core/pvf/src/host.rs index f893189ba2b10..5919b9ba32c91 100644 --- a/polkadot/node/core/pvf/src/host.rs +++ b/polkadot/node/core/pvf/src/host.rs @@ -211,7 +211,7 @@ pub async fn start( gum::debug!(target: LOG_TARGET, ?config, "starting PVF validation host"); // Run checks for supported security features once per host startup. Warn here if not enabled. - let security_status = security::check_security_status(&config).await?; + let security_status = security::check_security_status(&config).await; let (to_host_tx, to_host_rx) = mpsc::channel(10); diff --git a/polkadot/node/core/pvf/src/security.rs b/polkadot/node/core/pvf/src/security.rs index 27c95cb666623..0c0c5f401663f 100644 --- a/polkadot/node/core/pvf/src/security.rs +++ b/polkadot/node/core/pvf/src/security.rs @@ -14,9 +14,9 @@ // You should have received a copy of the GNU General Public License // along with Polkadot. If not, see . -use crate::{worker_intf::tmppath_in, Config, SecurityStatus, LOG_TARGET}; +use crate::{Config, SecurityStatus, LOG_TARGET}; use futures::join; -use std::{fmt, io, path::Path}; +use std::{fmt, path::Path}; use tokio::{ fs::{File, OpenOptions}, io::{AsyncReadExt, AsyncSeekExt, SeekFrom}, @@ -27,18 +27,19 @@ const SECURE_MODE_ANNOUNCEMENT: &'static str = \nMore information: https://wiki.polkadot.network/docs/maintain-guides-secure-validator#secure-validator-mode"; /// Run checks for supported security features. -pub async fn check_security_status(config: &Config) -> io::Result { +/// +/// # Return +/// +/// Returns the set of security features that we were able to enable. If an error occurs while +/// enabling a security feature we set the corresponding status to `false`. +pub async fn check_security_status(config: &Config) -> SecurityStatus { let Config { prepare_worker_program_path, cache_path, .. } = config; // TODO: add check that syslog is available and that seccomp violations are logged? - let cache_dir_tempdir = tmppath_in("check-can-unshare", &cache_path).await?; let (landlock, seccomp, change_root) = join!( check_landlock(prepare_worker_program_path), check_seccomp(prepare_worker_program_path), - check_can_unshare_user_namespace_and_change_root( - prepare_worker_program_path, - &cache_dir_tempdir - ) + check_can_unshare_user_namespace_and_change_root(prepare_worker_program_path, cache_path) ); let security_status = SecurityStatus { @@ -60,7 +61,7 @@ pub async fn check_security_status(config: &Config) -> io::Result; @@ -153,10 +154,19 @@ fn print_secure_mode_message(errs: Vec) -> bool { async fn check_can_unshare_user_namespace_and_change_root( #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] prepare_worker_program_path: &Path, - #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] cache_dir_tempdir: &Path, + #[cfg_attr(not(target_os = "linux"), allow(unused_variables))] cache_path: &Path, ) -> SecureModeResult { cfg_if::cfg_if! { if #[cfg(target_os = "linux")] { + let cache_dir_tempdir = + crate::worker_intf::tmppath_in("check-can-unshare", cache_path) + .await + .map_err( + |err| + SecureModeError::CannotUnshareUserNamespaceAndChangeRoot( + format!("could not create a temporary directory in {:?}: {}", cache_path, err) + ) + )?; match tokio::process::Command::new(prepare_worker_program_path) .arg("--check-can-unshare-user-namespace-and-change-root") .arg(cache_dir_tempdir)