Skip to content

Commit

Permalink
fix(libcontainer) no_pivot args is not used (#2923)
Browse files Browse the repository at this point in the history
* Support setting no_pivot_root for create and run command

Signed-off-by: Vanient <[email protected]>

* fix: mount move before choot

Move the rootfs to the root of the host filesystem before chrooting,
this is equivalent to pivot_root, if don't move mount first, we will
not see the new rootfs when exec into the container

Signed-off-by: xujihui1985 <[email protected]>

* fix(chroot): ensure mount occurs before chroot to mimic pivot_root behavior

Move the mount operation to occur before calling chroot to better simulate the effect of pivot_root.
Add a check to confirm if the current process is running inside an isolated mount namespace, ensuring proper mount handling.

Signed-off-by: xujihui1985 <[email protected]>

* implement intergration test for no-pivot

Signed-off-by: xujihui1985 <[email protected]>

* fix: add comments to no-pivot related code

Signed-off-by: xujihui1985 <[email protected]>

* fix(lint): fix format

Signed-off-by: xujihui1985 <[email protected]>

---------

Signed-off-by: Vanient <[email protected]>
Signed-off-by: xujihui1985 <[email protected]>
Co-authored-by: Vanient <[email protected]>
  • Loading branch information
xujihui1985 and Vanient authored Oct 29, 2024
1 parent 6cee446 commit d071596
Show file tree
Hide file tree
Showing 16 changed files with 304 additions and 21 deletions.
3 changes: 3 additions & 0 deletions crates/libcontainer/src/container/builder_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ pub(super) struct ContainerBuilderImpl {
pub detached: bool,
/// Default executes the specified execution of a generic command
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}

impl ContainerBuilderImpl {
Expand Down Expand Up @@ -154,6 +156,7 @@ impl ContainerBuilderImpl {
cgroup_config,
detached: self.detached,
executor: self.executor.clone(),
no_pivot: self.no_pivot,
};

let (init_pid, need_to_clean_up_intel_rdt_dir) =
Expand Down
8 changes: 8 additions & 0 deletions crates/libcontainer/src/container/init_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub struct InitContainerBuilder {
bundle: PathBuf,
use_systemd: bool,
detached: bool,
no_pivot: bool,
}

impl InitContainerBuilder {
Expand All @@ -31,6 +32,7 @@ impl InitContainerBuilder {
bundle,
use_systemd: true,
detached: true,
no_pivot: false,
}
}

Expand All @@ -45,6 +47,11 @@ impl InitContainerBuilder {
self
}

pub fn with_no_pivot(mut self, no_pivot: bool) -> Self {
self.no_pivot = no_pivot;
self
}

/// Creates a new container
pub fn build(self) -> Result<Container, LibcontainerError> {
let spec = self.load_spec()?;
Expand Down Expand Up @@ -95,6 +102,7 @@ impl InitContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: self.no_pivot,
};

builder_impl.create()?;
Expand Down
1 change: 1 addition & 0 deletions crates/libcontainer/src/container/tenant_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,7 @@ impl TenantContainerBuilder {
preserve_fds: self.base.preserve_fds,
detached: self.detached,
executor: self.base.executor,
no_pivot: false,
};

let pid = builder_impl.create()?;
Expand Down
2 changes: 2 additions & 0 deletions crates/libcontainer/src/process/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,4 +42,6 @@ pub struct ContainerArgs {
pub detached: bool,
/// Manage the functions that actually run on the container
pub executor: Box<dyn Executor>,
/// If do not use pivot root to jail process inside rootfs
pub no_pivot: bool,
}
85 changes: 72 additions & 13 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::path::{Path, PathBuf};
use std::{env, fs, mem};

use nc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::Mode;
use nix::unistd::{self, setsid, Gid, Uid};
Expand Down Expand Up @@ -270,6 +270,76 @@ fn reopen_dev_null() -> Result<()> {
Ok(())
}

// umount or hide the target path. If the target path is mounted
// try to unmount it first if the unmount operation fails with EINVAL
// then mount a tmpfs with size 0k to hide the target path.
fn unmount_or_hide(syscall: &dyn Syscall, target: impl AsRef<Path>) -> Result<()> {
let target_path = target.as_ref();
match syscall.umount2(target_path, MntFlags::MNT_DETACH) {
Ok(_) => Ok(()),
Err(SyscallError::Nix(nix::errno::Errno::EINVAL)) => syscall
.mount(
None,
target_path,
Some("tmpfs"),
MsFlags::MS_RDONLY,
Some("size=0k"),
)
.map_err(InitProcessError::SyscallOther),
Err(err) => Err(InitProcessError::SyscallOther(err)),
}
}

fn move_root(syscall: &dyn Syscall, rootfs: &Path) -> Result<()> {
unistd::chdir(rootfs).map_err(InitProcessError::NixOther)?;
// umount /sys and /proc if they are mounted, the purpose is to
// unmount or hide the /sys and /proc filesystems before the process changes its
// root to the new rootfs. thus ensure that the /sys and /proc filesystems are not
// accessible in the new rootfs. the logic is borrowed from crun
// https://github.com/containers/crun/blob/53cd1c1c697d7351d0cad23708d29bf4a7980a3a/src/libcrun/linux.c#L2780
unmount_or_hide(syscall, "/sys")?;
unmount_or_hide(syscall, "/proc")?;
syscall
.mount(Some(rootfs), Path::new("/"), None, MsFlags::MS_MOVE, None)
.map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to mount ms_move");
InitProcessError::SyscallOther(err)
})?;

syscall.chroot(Path::new(".")).map_err(|err| {
tracing::error!(?err, ?rootfs, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;

unistd::chdir("/").map_err(InitProcessError::NixOther)?;

Ok(())
}

fn do_pivot_root(
syscall: &dyn Syscall,
namespaces: &Namespaces,
no_pivot: bool,
rootfs: impl AsRef<Path>,
) -> Result<()> {
let rootfs_path = rootfs.as_ref();

let handle_error = |err: SyscallError, msg: &str| -> InitProcessError {
tracing::error!(?err, ?rootfs_path, msg);
InitProcessError::SyscallOther(err)
};

match namespaces.get(LinuxNamespaceType::Mount)? {
Some(_) if no_pivot => move_root(syscall, rootfs_path),
Some(_) => syscall
.pivot_rootfs(rootfs.as_ref())
.map_err(|err| handle_error(err, "failed to pivot root")),
None => syscall
.chroot(rootfs_path)
.map_err(|err| handle_error(err, "failed to chroot")),
}
}

// Some variables are unused in the case where libseccomp feature is not enabled.
#[allow(unused_variables)]
pub fn container_init_process(
Expand Down Expand Up @@ -343,18 +413,7 @@ pub fn container_init_process(
// we use pivot_root, but if we are on the host mount namespace, we will
// use simple chroot. Scary things will happen if you try to pivot_root
// in the host mount namespace...
if namespaces.get(LinuxNamespaceType::Mount)?.is_some() {
// change the root of filesystem of the process to the rootfs
syscall.pivot_rootfs(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to pivot root");
InitProcessError::SyscallOther(err)
})?;
} else {
syscall.chroot(rootfs_path).map_err(|err| {
tracing::error!(?err, ?rootfs_path, "failed to chroot");
InitProcessError::SyscallOther(err)
})?;
}
do_pivot_root(syscall.as_ref(), &namespaces, args.no_pivot, rootfs_path)?;

// As we have changed the root mount, from here on
// logs are no longer visible in journalctl
Expand Down
5 changes: 5 additions & 0 deletions crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -574,6 +574,11 @@ impl Syscall for LinuxSyscall {
}?;
Ok(())
}

fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
umount2(target, flags)?;
Ok(())
}
}

#[cfg(test)]
Expand Down
3 changes: 2 additions & 1 deletion crates/libcontainer/src/syscall/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use std::sync::Arc;

use caps::{CapSet, CapsHashSet};
use libc;
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
Expand Down Expand Up @@ -54,6 +54,7 @@ pub trait Syscall {
size: libc::size_t,
) -> Result<()>;
fn set_io_priority(&self, class: i64, priority: i64) -> Result<()>;
fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()>;
}

#[derive(Clone, Copy)]
Expand Down
28 changes: 27 additions & 1 deletion crates/libcontainer/src/syscall/test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use std::sync::Arc;

use caps::{CapSet, CapsHashSet};
use nix::mount::MsFlags;
use nix::mount::{MntFlags, MsFlags};
use nix::sched::CloneFlags;
use nix::sys::stat::{Mode, SFlag};
use nix::unistd::{Gid, Uid};
Expand Down Expand Up @@ -44,6 +44,12 @@ pub struct IoPriorityArgs {
pub priority: i64,
}

#[derive(Clone, PartialEq, Eq, Debug)]
pub struct UMount2Args {
pub target: PathBuf,
pub flags: MntFlags,
}

#[derive(Default)]
struct Mock {
values: Vec<Box<dyn Any>>,
Expand All @@ -64,6 +70,7 @@ pub enum ArgName {
Groups,
Capability,
IoPriority,
UMount2,
}

impl ArgName {
Expand Down Expand Up @@ -259,6 +266,16 @@ impl Syscall for TestHelperSyscall {
Box::new(IoPriorityArgs { class, priority }),
)
}

fn umount2(&self, target: &Path, flags: MntFlags) -> Result<()> {
self.mocks.act(
ArgName::UMount2,
Box::new(UMount2Args {
target: target.to_owned(),
flags,
}),
)
}
}

impl TestHelperSyscall {
Expand Down Expand Up @@ -369,4 +386,13 @@ impl TestHelperSyscall {
.map(|x| x.downcast_ref::<IoPriorityArgs>().unwrap().clone())
.collect::<Vec<IoPriorityArgs>>()
}

pub fn get_umount_args(&self) -> Vec<UMount2Args> {
self.mocks
.fetch(ArgName::UMount2)
.values
.iter()
.map(|x| x.downcast_ref::<UMount2Args>().unwrap().clone())
.collect::<Vec<UMount2Args>>()
}
}
1 change: 1 addition & 0 deletions crates/youki/src/commands/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ pub fn create(args: Create, root_path: PathBuf, systemd_cgroup: bool) -> Result<
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(true)
.with_no_pivot(args.no_pivot)
.build()?;

Ok(())
Expand Down
1 change: 1 addition & 0 deletions crates/youki/src/commands/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub fn run(args: Run, root_path: PathBuf, systemd_cgroup: bool) -> Result<i32> {
.as_init(&args.bundle)
.with_systemd(systemd_cgroup)
.with_detach(args.detach)
.with_no_pivot(args.no_pivot)
.build()?;

container
Expand Down
3 changes: 3 additions & 0 deletions tests/contest/contest/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ use crate::tests::io_priority::get_io_priority_test;
use crate::tests::lifecycle::{ContainerCreate, ContainerLifecycle};
use crate::tests::linux_ns_itype::get_ns_itype_tests;
use crate::tests::mounts_recursive::get_mounts_recursive_test;
use crate::tests::no_pivot::get_no_pivot_test;
use crate::tests::pidfile::get_pidfile_test;
use crate::tests::readonly_paths::get_ro_paths_test;
use crate::tests::scheduler::get_scheduler_test;
Expand Down Expand Up @@ -113,6 +114,7 @@ fn main() -> Result<()> {
let scheduler = get_scheduler_test();
let io_priority_test = get_io_priority_test();
let devices = get_devices_test();
let no_pivot = get_no_pivot_test();

tm.add_test_group(Box::new(cl));
tm.add_test_group(Box::new(cc));
Expand All @@ -136,6 +138,7 @@ fn main() -> Result<()> {
tm.add_test_group(Box::new(sysctl));
tm.add_test_group(Box::new(scheduler));
tm.add_test_group(Box::new(devices));
tm.add_test_group(Box::new(no_pivot));

tm.add_test_group(Box::new(io_priority_test));
tm.add_cleanup(Box::new(cgroups::cleanup_v1));
Expand Down
1 change: 1 addition & 0 deletions tests/contest/contest/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ pub mod io_priority;
pub mod lifecycle;
pub mod linux_ns_itype;
pub mod mounts_recursive;
pub mod no_pivot;
pub mod pidfile;
pub mod readonly_paths;
pub mod scheduler;
Expand Down
29 changes: 29 additions & 0 deletions tests/contest/contest/src/tests/no_pivot/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
use anyhow::{Context, Result};
use oci_spec::runtime::{ProcessBuilder, Spec, SpecBuilder};
use test_framework::{test_result, Test, TestGroup, TestResult};

use crate::utils::test_utils::test_inside_container_with_no_pivot;

fn create_spec() -> Result<Spec> {
SpecBuilder::default()
.process(
ProcessBuilder::default()
.args(vec!["runtimetest".to_string(), "no_pivot".to_string()])
.build()?,
)
.build()
.context("failed to create spec")
}

fn no_pivot_test() -> TestResult {
let spec = test_result!(create_spec());
test_inside_container_with_no_pivot(spec, &|_| Ok(()))
}

pub fn get_no_pivot_test() -> TestGroup {
let mut test_group = TestGroup::new("no_pivot");
let no_pivot_test = Test::new("no_pivot_test", Box::new(no_pivot_test));
test_group.add(vec![Box::new(no_pivot_test)]);

test_group
}
Loading

0 comments on commit d071596

Please sign in to comment.