Skip to content

Commit

Permalink
dbg
Browse files Browse the repository at this point in the history
Signed-off-by: utam0k <[email protected]>
  • Loading branch information
utam0k committed May 21, 2023
1 parent c5503d5 commit d43ceda
Show file tree
Hide file tree
Showing 3 changed files with 260 additions and 34 deletions.
155 changes: 146 additions & 9 deletions crates/libcontainer/src/capabilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,38 +129,175 @@ pub fn reset_effective<S: Syscall + ?Sized>(syscall: &S) -> Result<(), SyscallEr
Ok(())
}

/// Drop any extra granted capabilities, and reset to defaults which are in oci specification
pub fn drop_privileges<S: Syscall + ?Sized>(
pub fn apply_bounding<S: Syscall + ?Sized>(
cs: &LinuxCapabilities,
syscall: &S,
) -> Result<(), SyscallError> {
tracing::debug!("dropping bounding capabilities to {:?}", cs.bounding());
tracing::debug!("applying bounding capabilities to {:?}", cs.bounding());
if let Some(bounding) = cs.bounding() {
syscall.set_capability(CapSet::Bounding, &to_set(bounding))?;
}

Ok(())
}

pub fn apply<S: Syscall + ?Sized>(cs: &LinuxCapabilities, syscall: &S) -> Result<(), SyscallError> {
apply_bounding(cs, syscall)?;

// if let Some(effective) = cs.effective() {
// if !effective.is_empty() {
// syscall.set_capability(CapSet::Effective, &to_set(effective))?;
// }
// }
//
// if let Some(permitted) = cs.permitted() {
// if !permitted.is_empty() {
// syscall.set_capability(CapSet::Permitted, &to_set(permitted))?;
// }
// }
//
// if let Some(inheritable) = cs.inheritable() {
// if !inheritable.is_empty() {
// syscall.set_capability(CapSet::Inheritable, &to_set(inheritable))?;
// }
// }
//
// if let Some(ambient) = cs.ambient() {
// if !ambient.is_empty() {
// // check specifically for ambient, as those might not always be available
// if let Err(e) = syscall.set_capability(CapSet::Ambient, &to_set(ambient)) {
// tracing::error!("failed to set ambient capabilities: {}", e);
// }
// }
// }

let mut c = Caps::new();
if let Some(effective) = cs.effective() {
syscall.set_capability(CapSet::Effective, &to_set(effective))?;
if !effective.is_empty() {
c.set(CapSet::Effective, &to_set(effective))?;
}
}

if let Some(permitted) = cs.permitted() {
syscall.set_capability(CapSet::Permitted, &to_set(permitted))?;
if !permitted.is_empty() {
c.set(CapSet::Permitted, &to_set(permitted))?;
}
}

if let Some(inheritable) = cs.inheritable() {
syscall.set_capability(CapSet::Inheritable, &to_set(inheritable))?;
if !inheritable.is_empty() {
c.set(CapSet::Inheritable, &to_set(inheritable))?;
}
}
c.apply()?;

if let Some(ambient) = cs.ambient() {
// check specifically for ambient, as those might not always be available
if let Err(e) = syscall.set_capability(CapSet::Ambient, &to_set(ambient)) {
tracing::error!("failed to set ambient capabilities: {}", e);
if !ambient.is_empty() {
caps::set(None, caps::CapSet::Ambient, &to_set(ambient))?;
}
}

Ok(())
}

pub fn keep(keep_caps: bool) -> Result<(), SyscallError> {
prctl::set_keep_capabilities(keep_caps).map_err(|errno| {
// TODO:ERROR MSG
tracing::error!(?errno, "failed to set keep capabilities to false");
nix::errno::from_i32(errno)
})?;
Ok(())
}

// ========================================
//
const CAPS_V3: u32 = 0x20080522;

#[derive(Debug, Default, Clone)]
#[repr(C)]
struct CapUserData {
effective_s0: u32,
permitted_s0: u32,
inheritable_s0: u32,
effective_s1: u32,
permitted_s1: u32,
inheritable_s1: u32,
}

#[derive(Debug)]
#[repr(C)]
struct CapUserHeader {
// Linux capabilities version (runtime kernel support)
version: u32,
// Process ID (thread)
pid: i32,
}

struct Caps {
data: CapUserData,
}

impl Caps {
fn new() -> Self {
Self {
data: CapUserData::default(),
}
}

fn set(&mut self, cset: CapSet, value: &CapsHashSet) -> Result<(), SyscallError> {
// let mut data: CapUserData = Default::default();
{
let (s1, s0) = match cset {
CapSet::Effective => (&mut self.data.effective_s1, &mut self.data.effective_s0),
CapSet::Inheritable => {
(&mut self.data.inheritable_s1, &mut self.data.inheritable_s0)
}
CapSet::Permitted => (&mut self.data.permitted_s1, &mut self.data.permitted_s0),
CapSet::Bounding | CapSet::Ambient => {
return Err(SyscallError::SetCaps("not a base set".into()))
}
};
*s1 = 0;
*s0 = 0;
for c in value {
match c.index() {
0..=31 => {
*s0 |= c.bitmask() as u32;
}
32..=63 => {
*s1 |= (c.bitmask() >> 32) as u32;
}
_ => {
return Err(SyscallError::SetCaps(
format!("overlarge capability index {}", c.index()).into(),
))
}
}
}
}

Ok(())
}

fn apply(&mut self) -> Result<(), SyscallError> {
let mut hdr = CapUserHeader {
version: CAPS_V3,
pid: 0,
};
capset(&mut hdr, &self.data)
}
}

fn capset(hdr: &mut CapUserHeader, data: &CapUserData) -> Result<(), SyscallError> {
let r = unsafe { libc::syscall(126, hdr, data) };
match r {
0 => Ok(()),
_ => Err(SyscallError::SetCaps(
format!("capset failure: {}", std::io::Error::last_os_error()).into(),
)),
}
}

#[cfg(test)]
mod tests {
use oci_spec::runtime::LinuxCapabilitiesBuilder;
Expand Down
64 changes: 40 additions & 24 deletions crates/libcontainer/src/process/container_init_process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -487,23 +487,6 @@ pub fn container_init_process(
}
};

set_supplementary_gids(proc.user(), args.rootless, syscall).map_err(|err| {
tracing::error!(?err, "failed to set supplementary gids");
err
})?;

syscall
.set_id(
Uid::from_raw(proc.user().uid()),
Gid::from_raw(proc.user().gid()),
)
.map_err(|err| {
let uid = proc.user().uid();
let gid = proc.user().gid();
tracing::error!(?err, ?uid, ?gid, "failed to set uid and gid");
InitProcessError::SyscallOther(err)
})?;

// Take care of LISTEN_FDS used for systemd-active-socket. If the value is
// not 0, then we have to preserve those fds as well, and set up the correct
// environment variables.
Expand Down Expand Up @@ -582,18 +565,45 @@ pub fn container_init_process(
InitProcessError::SyscallOther(err)
})?;
if let Some(caps) = proc.capabilities() {
capabilities::drop_privileges(caps, syscall).map_err(|err| {
capabilities::apply_bounding(caps, syscall).map_err(|err| {
tracing::error!(?err, "failed to drop capabilities");
InitProcessError::SyscallOther(err)
})?;
}

// Change directory to process.cwd if process.cwd is not empty
if do_chdir {
unistd::chdir(proc.cwd()).map_err(|err| {
let cwd = proc.cwd();
tracing::error!(?err, ?cwd, "failed to chdir to cwd");
InitProcessError::NixOther(err)
keep_caps(|| {
set_supplementary_gids(proc.user(), args.rootless, syscall).map_err(|err| {
tracing::error!(?err, "failed to set supplementary gids");
err
})?;

syscall
.set_id(
Uid::from_raw(proc.user().uid()),
Gid::from_raw(proc.user().gid()),
)
.map_err(|err| {
let uid = proc.user().uid();
let gid = proc.user().gid();
tracing::error!(?err, ?uid, ?gid, "failed to set uid and gid");
InitProcessError::SyscallOther(err)
})?;

// Change directory to process.cwd if process.cwd is not empty
if do_chdir {
unistd::chdir(proc.cwd()).map_err(|err| {
let cwd = proc.cwd();
tracing::error!(?err, ?cwd, "failed to chdir to cwd");
InitProcessError::NixOther(err)
})?;
}
Ok(())
})?;

if let Some(caps) = proc.capabilities() {
capabilities::apply(caps, syscall).map_err(|err| {
tracing::error!(?err, "failed to apply capabilities");
InitProcessError::SyscallOther(err)
})?;
}

Expand Down Expand Up @@ -687,6 +697,12 @@ pub fn container_init_process(
}?
}

fn keep_caps<F: Fn() -> Result<()>>(f: F) -> Result<()> {
capabilities::keep(true).map_err(InitProcessError::SyscallOther)?;
f()?;
capabilities::keep(false).map_err(InitProcessError::SyscallOther)
}

// Before 3.19 it was possible for an unprivileged user to enter an user namespace,
// become root and then call setgroups in order to drop membership in supplementary
// groups. This allowed access to files which blocked access based on being a member
Expand Down
75 changes: 74 additions & 1 deletion crates/libcontainer/src/syscall/linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,9 @@ impl Syscall for LinuxSyscall {

/// Set capabilities for container process
fn set_capability(&self, cset: CapSet, value: &CapsHashSet) -> Result<()> {
// TODO:
// Bounding -> Ok
// そのほかをいっぺんにセットする必要がある
match cset {
// caps::set cannot set capabilities in bounding set,
// so we do it differently
Expand All @@ -374,7 +377,8 @@ impl Syscall for LinuxSyscall {
}
}
_ => {
caps::set(None, cset, value)?;
// caps::set(None, cset, value)?;
apply_caps(0, cset, value)?;
}
}
Ok(())
Expand Down Expand Up @@ -683,3 +687,72 @@ mod tests {
Ok(())
}
}

const CAPS_V3: u32 = 0x20080522;

fn apply_caps(tid: i32, cset: CapSet, value: &CapsHashSet) -> Result<()> {
let mut hdr = CapUserHeader {
version: CAPS_V3,
pid: tid,
};
let mut data: CapUserData = Default::default();
{
let (s1, s0) = match cset {
CapSet::Effective => (&mut data.effective_s1, &mut data.effective_s0),
CapSet::Inheritable => (&mut data.inheritable_s1, &mut data.inheritable_s0),
CapSet::Permitted => (&mut data.permitted_s1, &mut data.permitted_s0),
CapSet::Bounding | CapSet::Ambient => {
return Err(SyscallError::SetCaps("not a base set".into()))
}
};
*s1 = 0;
*s0 = 0;
for c in value {
match c.index() {
0..=31 => {
*s0 |= c.bitmask() as u32;
}
32..=63 => {
*s1 |= (c.bitmask() >> 32) as u32;
}
_ => {
return Err(SyscallError::SetCaps(
format!("overlarge capability index {}", c.index()).into(),
))
}
}
}
}
tracing::warn!("capset({:?}, {:?})", hdr, data);
capset(&mut hdr, &data)
}

fn capset(hdr: &mut CapUserHeader, data: &CapUserData) -> Result<()> {
let r = unsafe { libc::syscall(126, hdr, data) };
match r {
0 => Ok(()),
_ => Err(SyscallError::SetCaps(
format!("capset failure: {}", std::io::Error::last_os_error()).into(),
)),
}
}

#[derive(Debug, Default, Clone)]
#[repr(C)]
struct CapUserData {
effective_s0: u32,
permitted_s0: u32,
inheritable_s0: u32,
effective_s1: u32,
permitted_s1: u32,
inheritable_s1: u32,
}

#[derive(Debug)]
#[repr(C)]
struct CapUserHeader {
// Linux capabilities version (runtime kernel support)
version: u32,
// Process ID (thread)
pid: i32,
}

0 comments on commit d43ceda

Please sign in to comment.