From 3a4fbbde4d7b659f7a3175c8c67616e663b97c3d Mon Sep 17 00:00:00 2001 From: mfranciszkiewicz Date: Sun, 1 Aug 2021 00:14:14 +0200 Subject: [PATCH 1/5] Root and volume overlay filesystem Root filesystem is no longer stored in RAM and limited to 128 MB (by default; can be configured via image metadata <- gvmkit-build ). Volumes now preserve their original contents copied into the VM image. --- Cargo.lock | 4 +- runtime/Cargo.toml | 2 +- runtime/init-container/src/init.c | 212 +++++++++++++++++++++++------- runtime/src/deploy.rs | 43 +++++- runtime/src/main.rs | 48 +++++-- 5 files changed, 238 insertions(+), 71 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04915bec..5d7fda13 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1,5 +1,7 @@ # This file is automatically @generated by Cargo. # It is not intended for manual editing. +version = 3 + [[package]] name = "actix-codec" version = "0.3.0" @@ -2790,7 +2792,7 @@ dependencies = [ [[package]] name = "ya-runtime-vm" -version = "0.2.8" +version = "0.2.9" dependencies = [ "anyhow", "bollard-stubs", diff --git a/runtime/Cargo.toml b/runtime/Cargo.toml index d2704cf8..2079a236 100644 --- a/runtime/Cargo.toml +++ b/runtime/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ya-runtime-vm" -version = "0.2.8" +version = "0.2.9" authors = ["Golem Factory "] edition = "2018" license = "GPL-3.0" diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index 14037ca3..e4db602f 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "communication.h" #include "cyclic_buffer.h" @@ -40,9 +41,14 @@ .type = REDIRECT_FD_FILE, \ .path = NULL, \ } - #define MODE_RW_UGO (S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH) + +#define USER_FS_TAG "userfs" #define OUTPUT_PATH_PREFIX "/var/tmp/guest_agent_private/fds" +#define VOLUMES_PATH_PREFIX "/var/tmp/guest_agent_private/vols" + +const char* ARG_FS_RAM = "ram"; +const char* ARG_FS_RAM_TMP = "ram-tmp"; struct new_process_args { char* bin; @@ -75,6 +81,9 @@ static int g_sig_fd = -1; static int g_epoll_fd = -1; static struct process_desc* g_entrypoint_desc = NULL; +static struct cmd_args { + char* fs; +} args = { NULL }; static noreturn void die(void) { sync(); @@ -97,6 +106,15 @@ static noreturn void die(void) { _x; \ }) +#define ALLOCC(x) ({ \ + __typeof__(x) _x = (x); \ + if (!_x) { \ + fprintf(stderr, "OOM at %s:%d: %m\n", __FILE__, __LINE__); \ + die(); \ + } \ + _x; \ +}) + static void load_module(const char* path) { int fd = CHECK(open(path, O_RDONLY | O_CLOEXEC)); CHECK(syscall(SYS_finit_module, fd, "", 0)); @@ -260,39 +278,61 @@ static void setup_sigfd(void) { g_sig_fd = CHECK(signalfd(g_sig_fd, &set, SFD_CLOEXEC)); } -static int create_dir_path(char* path) { +static int mkdirp(const char* full_path, mode_t mode) { assert(path[0] == '/'); + char* path = ALLOCC(strdup(full_path)); char* next = path; + int code = 0; + while (1) { next = strchr(next + 1, '/'); if (!next) { break; } *next = '\0'; - int ret = mkdir(path, DEFAULT_DIR_PERMS); + int ret = mkdir(path, mode); *next = '/'; if (ret < 0 && errno != EEXIST) { - return -1; + code = -1; + goto end; } } - if (mkdir(path, DEFAULT_DIR_PERMS) < 0 && errno != EEXIST) { - return -1; + if (mkdir(path, mode) < 0 && errno != EEXIST) { + code = -1; + goto end; } - return 0; + +end: + free(path); + return code; } -static void setup_agent_directories(void) { - char* path = strdup(OUTPUT_PATH_PREFIX); - if (!path) { - fprintf(stderr, "setup_agent_directories OOM\n"); - die(); +static int mkdirsp(const char* paths[], size_t n, mode_t mode) { + int code = 0; + for (size_t i = 0; i < n; ++i) { + if ((code = mkdirp(paths[i], mode)) != 0) { + break; + } } + return code; +} - CHECK(create_dir_path(path)); +static void setup_agent_directories(void) { + char* paths[] = { + OUTPUT_PATH_PREFIX, + VOLUMES_PATH_PREFIX + }; - free(path); + size_t n = sizeof(paths) / sizeof(paths[0]); + for (size_t i = 0; i < n; ++i) { + if (!paths[i]) { + fprintf(stderr, "setup_agent_directories OOM\n"); + die(); + } + CHECK(mkdirp(paths[i], DEFAULT_DIR_PERMS)); + } } static void send_response_hdr(msg_id_t msg_id, enum GUEST_MSG_TYPE type) { @@ -878,13 +918,49 @@ static void handle_kill_process(msg_id_t msg_id) { } static uint32_t do_mount(const char* tag, char* path) { - if (create_dir_path(path) < 0) { - return errno; + char* dirs[] = { NULL, NULL, NULL }; + char* vol_dir = NULL; + char* mnt_dir = NULL; + char* args = NULL; + + size_t dirs_sz = sizeof(dirs) / sizeof(dirs[0]); + + ALLOCC(asprintf(&vol_dir, "%s/%s", VOLUMES_PATH_PREFIX, tag)); + ALLOCC(asprintf(&mnt_dir, "%s/userfs", vol_dir)); + ALLOCC(asprintf(&dirs[0], "%s/imagefs", vol_dir)); + ALLOCC(asprintf(&dirs[1], "%s/fs", mnt_dir)); + ALLOCC(asprintf(&dirs[2], "%s/tmp", mnt_dir)); + + const char* entry_dirs[] = { vol_dir, mnt_dir, path, dirs[0] }; + const char* upper_dirs[] = { dirs[1], dirs[2] }; + + if (mkdirsp(entry_dirs, sizeof(entry_dirs) / sizeof(entry_dirs[0]), S_IRWXU) < 0) { + goto end; } - if (mount(tag, path, "9p", 0, "trans=virtio,version=9p2000.L") < 0) { - return errno; + if (mount(path, dirs[0], "none", MS_BIND | MS_REC, NULL) < 0) { + goto end; } - return 0; + if (mount(tag, mnt_dir, "9p", 0, + "defaults,trans=virtio,version=9p2000.L,nodevmap,redirect_dir=on") < 0) { + goto end; + } + if (mkdirsp(upper_dirs, sizeof(upper_dirs) / sizeof(upper_dirs[0]), S_IRWXU) < 0) { + goto end; + } + + ALLOCC(asprintf(&args, "lowerdir=%s,upperdir=%s,workdir=%s", dirs[0], dirs[1], dirs[2])); + CHECK(mount("overlay", path, "overlay", 0, args)); + + end: + if (vol_dir) free(vol_dir); + if (mnt_dir) free(mnt_dir); + if (args) free(args); + + for (size_t i = 0; i < dirs_sz; ++i) { + free(dirs[i]); + }; + + return errno == EEXIST ? 0 : errno; } static void handle_mount(msg_id_t msg_id) { @@ -923,8 +999,8 @@ static void handle_mount(msg_id_t msg_id) { ret = do_mount(tag, path); out: - free(path); - free(tag); + if (path) free(path); + if (tag) free(tag); if (ret) { send_response_err(msg_id, ret); } else { @@ -1254,19 +1330,39 @@ static noreturn void main_loop(void) { } } -static void create_dir(const char *pathname, mode_t mode) { - if (mkdir(pathname, mode) < 0 && errno != EEXIST) { - fprintf(stderr, "mkdir(%s) failed with: %m\n", pathname); - die(); +struct cmd_args parse_args(int argc, char *argv[]) { + int parsing = 1; + while (parsing) { + static struct option options[] = { + {"fs", required_argument, 0, 'f'}, + {0, 0, 0, 0} + }; + + int i = 0; + int c = getopt_long(argc, argv, "f:", options, &i); + + switch (c) { + case -1: + case 0: + parsing = 0; + break; + case 'f': + args.fs = optarg; + continue; + default: + fprintf(stderr, "arg: %c\n", c); + } } + + return args; } -int main(void) { +int main(int argc, char *argv[]) { setbuf(stdin, NULL); setbuf(stdout, NULL); setbuf(stderr, NULL); - create_dir("/dev", DEFAULT_DIR_PERMS); + CHECK(mkdirp("/dev", DEFAULT_DIR_PERMS)); CHECK(mount("devtmpfs", "/dev", "devtmpfs", MS_NOSUID, "mode=0755,size=2M")); @@ -1286,32 +1382,43 @@ int main(void) { g_cmds_fd = CHECK(open("/dev/vport0p1", O_RDWR | O_CLOEXEC)); - CHECK(mkdir("/mnt", S_IRWXU)); - CHECK(mkdir("/mnt/image", S_IRWXU)); - CHECK(mkdir("/mnt/overlay", S_IRWXU)); - CHECK(mkdir("/mnt/newroot", DEFAULT_DIR_PERMS)); - - // 'workdir' and 'upperdir' have to be on the same filesystem - CHECK(mount("tmpfs", "/mnt/overlay", "tmpfs", - MS_NOSUID, - "mode=0777,size=128M")); - - CHECK(mkdir("/mnt/overlay/upper", S_IRWXU)); - CHECK(mkdir("/mnt/overlay/work", S_IRWXU)); + struct cmd_args args = parse_args(argc, argv); - CHECK(mount("/dev/vda", "/mnt/image", "squashfs", MS_RDONLY, "")); - CHECK(mount("overlay", "/mnt/newroot", "overlay", 0, - "lowerdir=/mnt/image,upperdir=/mnt/overlay/upper,workdir=/mnt/overlay/work")); + CHECK(mkdirp("/mnt/imagefs", S_IRWXU)); + CHECK(mkdirp("/mnt/userfs", S_IRWXU)); + CHECK(mkdirp("/mnt/overlay", DEFAULT_DIR_PERMS)); - CHECK(umount2("/dev", MNT_DETACH)); + CHECK(mount("/dev/vda", "/mnt/imagefs", "squashfs", + MS_RDONLY, + NULL)); + + if (args.fs != NULL && strcmp(args.fs, ARG_FS_RAM) == 0) { + CHECK(mount("tmpfs", "/mnt/userfs", "tmpfs", + 0, + "mode=0777,size=128M")); + } else { + CHECK(mount(USER_FS_TAG, "/mnt/userfs", "9p", + 0, + "defaults,trans=virtio,version=9p2000.L,nodevmap,redirect_dir=on")); + } - CHECK(chdir("/mnt/newroot")); + CHECK(mkdirp("/mnt/userfs/upper", S_IRWXU)); + CHECK(mkdirp("/mnt/userfs/work", S_IRWXU)); + + CHECK(mount("overlay", "/mnt/overlay", "overlay", + 0, + "lowerdir=/mnt/imagefs,upperdir=/mnt/userfs/upper,workdir=/mnt/userfs/work")); + + CHECK(mkdirp("/mnt/overlay/mnt/volumes", S_IRWXU)); + + CHECK(umount2("/dev", MNT_DETACH)); + CHECK(chdir("/mnt/overlay")); CHECK(mount(".", "/", "none", MS_MOVE, NULL)); CHECK(chroot(".")); CHECK(chdir("/")); - create_dir("/dev", DEFAULT_DIR_PERMS); - create_dir("/tmp", DEFAULT_DIR_PERMS); + CHECK(mkdirp("/dev", DEFAULT_DIR_PERMS)); + CHECK(mkdirp("/tmp", DEFAULT_DIR_PERMS)); CHECK(mount("proc", "/proc", "proc", MS_NODEV | MS_NOSUID | MS_NOEXEC, @@ -1322,12 +1429,15 @@ int main(void) { CHECK(mount("devtmpfs", "/dev", "devtmpfs", MS_NOSUID, "exec,mode=0755,size=2M")); - CHECK(mount("tmpfs", "/tmp", "tmpfs", - MS_NOSUID, - "mode=0777")); - create_dir("/dev/pts", DEFAULT_DIR_PERMS); - create_dir("/dev/shm", DEFAULT_DIR_PERMS); + if (args.fs != NULL && strcmp(args.fs, ARG_FS_RAM_TMP) == 0) { + CHECK(mount("tmpfs", "/tmp", "tmpfs", + MS_NOSUID, + "mode=0777")); + } + + CHECK(mkdirp("/dev/pts", DEFAULT_DIR_PERMS)); + CHECK(mkdirp("/dev/shm", DEFAULT_DIR_PERMS)); CHECK(mount("devpts", "/dev/pts", "devpts", MS_NOSUID | MS_NOEXEC, diff --git a/runtime/src/deploy.rs b/runtime/src/deploy.rs index 785d1670..05e4f6bf 100644 --- a/runtime/src/deploy.rs +++ b/runtime/src/deploy.rs @@ -19,7 +19,33 @@ pub struct Deployment { pub task_package: PathBuf, pub user: (u32, u32), pub volumes: Vec, - pub config: ContainerConfig, + pub config: Config, +} + +#[derive(Debug, Default, Deserialize, Serialize)] +pub struct Config { + #[serde(flatten)] + pub container: ContainerConfig, + #[serde(default)] + pub fs: Fs, +} + +/// Root filesystem overlay mode +#[derive(Debug, Deserialize, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum Fs { + /// Mount the overlay on disk (default) + Disk, + /// Keep the overlay in RAM (limit: 128 MB) + Ram, + /// Mount the overlay on disk but keep /tmp in RAM (limit: 128 MB) + RamTmp, +} + +impl Default for Fs { + fn default() -> Self { + Self::Disk + } } impl Deployment { @@ -55,24 +81,33 @@ impl Deployment { return Err(anyhow::anyhow!("Invalid ContainerConfig crc32 sum")); } - let config: ContainerConfig = serde_json::from_str(&json)?; + let config: Config = serde_json::from_str(&json)?; Ok(Deployment { cpu_cores, mem_mib, task_package, - user: parse_user(config.user.as_ref())?, - volumes: parse_volumes(config.volumes.as_ref()), + user: parse_user(config.container.user.as_ref())?, + volumes: parse_volumes(config.container.volumes.as_ref()), config, }) } pub fn env(&self) -> Vec<&str> { self.config + .container .env .as_ref() .map(|v| v.iter().map(|s| s.as_str()).collect()) .unwrap_or_else(Vec::new) } + + pub fn init_args(&self) -> &'static str { + match &self.config.fs { + Fs::Ram => "-f ram", + Fs::RamTmp => "-f ram-tmp", + _ => "-f disk", + } + } } fn parse_user(user: Option<&String>) -> anyhow::Result<(u32, u32)> { diff --git a/runtime/src/main.rs b/runtime/src/main.rs index b62bc53e..c5b80316 100644 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -23,7 +23,7 @@ use ya_runtime_sdk::{ }; use ya_runtime_vm::{ cpu::CpuInfo, - deploy::Deployment, + deploy::{Deployment, Fs}, guest_agent_comm::{GuestAgent, Notification, RedirectFdType, RemoteCommandResult}, }; @@ -34,6 +34,7 @@ const FILE_INITRAMFS: &'static str = "initramfs.cpio.gz"; const FILE_TEST_IMAGE: &'static str = "self-test.gvmi"; const FILE_DEPLOYMENT: &'static str = "deployment.json"; const DEFAULT_CWD: &'static str = "/"; +const USER_FS_TAG: &'static str = "userfs"; #[derive(StructOpt, Clone, Default)] #[structopt(rename_all = "kebab-case")] @@ -182,6 +183,9 @@ async fn deploy(workdir: PathBuf, cli: Cli) -> anyhow::Result {} + _ => { + cmd.arg("-virtfs"); + cmd.arg(format!( + "local,id={tag},path={path},security_model=mapped,mount_tag={tag}", + tag = USER_FS_TAG, + path = work_dir.join(USER_FS_TAG).to_string_lossy(), + )); + } + } + for (idx, volume) in deployment.volumes.iter().enumerate() { cmd.arg("-virtfs"); cmd.arg(format!( - "local,id={tag},path={path},security_model=none,mount_tag={tag}", + "local,id={tag},path={path},security_model=mapped,mount_tag={tag}", tag = format!("mnt{}", idx), path = work_dir.join(&volume.name).to_string_lossy(), )); @@ -306,6 +322,7 @@ async fn run_command( let env = deployment.env(); let cwd = deployment .config + .container .working_dir .as_ref() .filter(|s| !s.trim().is_empty()) @@ -313,7 +330,7 @@ async fn run_command( .unwrap_or_else(|| DEFAULT_CWD); log::debug!("got run process: {:?}", run); - log::debug!("work dir: {:?}", deployment.config.working_dir); + log::debug!("work dir: {:?}", deployment.config.container.working_dir); let result = data .ga() @@ -397,18 +414,21 @@ async fn test() -> anyhow::Result<()> { .expect("Test image not found"); println!("Task package: {}", task_package.display()); - let runtime_data = RuntimeData { - runtime: None, - ga: None, - deployment: Some(Deployment { - cpu_cores: 1, - mem_mib: 128, - task_package, - ..Deployment::default() - }), + + let mut deployment = Deployment { + cpu_cores: 1, + mem_mib: 128, + task_package, + ..Deployment::default() }; + deployment.config.fs = Fs::Ram; + let runtime = Runtime { - data: Arc::new(Mutex::new(runtime_data)), + data: Arc::new(Mutex::new(RuntimeData { + runtime: None, + ga: None, + deployment: Some(deployment), + })), }; println!("Starting runtime"); From c65cfc0574fc681aabe91b7ffee8e433f758d617 Mon Sep 17 00:00:00 2001 From: mfranciszkiewicz Date: Sun, 1 Aug 2021 15:54:14 +0200 Subject: [PATCH 2/5] ExeUnit transfer service compatibility --- runtime/init-container/src/init.c | 74 +++++++++++++---------------- runtime/src/deploy.rs | 79 +++++++++++++++++++++++++------ runtime/src/main.rs | 56 +++++++++------------- 3 files changed, 122 insertions(+), 87 deletions(-) diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index e4db602f..fcff28b3 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -309,16 +309,6 @@ static int mkdirp(const char* full_path, mode_t mode) { return code; } -static int mkdirsp(const char* paths[], size_t n, mode_t mode) { - int code = 0; - for (size_t i = 0; i < n; ++i) { - if ((code = mkdirp(paths[i], mode)) != 0) { - break; - } - } - return code; -} - static void setup_agent_directories(void) { char* paths[] = { OUTPUT_PATH_PREFIX, @@ -918,49 +908,53 @@ static void handle_kill_process(msg_id_t msg_id) { } static uint32_t do_mount(const char* tag, char* path) { - char* dirs[] = { NULL, NULL, NULL }; - char* vol_dir = NULL; - char* mnt_dir = NULL; + char* args_9p = "defaults,trans=virtio,version=9p2000.L,nodevmap,redirect_dir=on"; char* args = NULL; - size_t dirs_sz = sizeof(dirs) / sizeof(dirs[0]); + size_t dirs_sz = 5; + char** dirs = NULL; - ALLOCC(asprintf(&vol_dir, "%s/%s", VOLUMES_PATH_PREFIX, tag)); - ALLOCC(asprintf(&mnt_dir, "%s/userfs", vol_dir)); - ALLOCC(asprintf(&dirs[0], "%s/imagefs", vol_dir)); - ALLOCC(asprintf(&dirs[1], "%s/fs", mnt_dir)); - ALLOCC(asprintf(&dirs[2], "%s/tmp", mnt_dir)); + if ((dirs = malloc(sizeof(char*) * dirs_sz)) == NULL) { + return -ENOMEM; + } + memset(dirs, 0, dirs_sz); - const char* entry_dirs[] = { vol_dir, mnt_dir, path, dirs[0] }; - const char* upper_dirs[] = { dirs[1], dirs[2] }; + ALLOCC(asprintf(&dirs[0], "%s/%s", VOLUMES_PATH_PREFIX, tag)); + ALLOCC(asprintf(&dirs[1], "%s/mnt", dirs[0])); + ALLOCC(asprintf(&dirs[2], "%s/lower", dirs[0])); + ALLOCC(asprintf(&dirs[3], "%s/upper", dirs[1])); + ALLOCC(asprintf(&dirs[4], "%s/work", dirs[1])); + ALLOCC(asprintf(&args, + "lowerdir=%s,upperdir=%s,workdir=%s", + dirs[2], dirs[3], dirs[4])); - if (mkdirsp(entry_dirs, sizeof(entry_dirs) / sizeof(entry_dirs[0]), S_IRWXU) < 0) { - goto end; - } - if (mount(path, dirs[0], "none", MS_BIND | MS_REC, NULL) < 0) { - goto end; - } - if (mount(tag, mnt_dir, "9p", 0, - "defaults,trans=virtio,version=9p2000.L,nodevmap,redirect_dir=on") < 0) { + if (mkdirp(path, S_IRWXU) != 0) goto end; + if (mkdirp(dirs[1], S_IRWXU) != 0) goto end; + if (mkdirp(dirs[2], S_IRWXU) != 0) goto end; + + if (mount(tag, dirs[1], "9p", 0, args_9p) != 0) { goto end; } - if (mkdirsp(upper_dirs, sizeof(upper_dirs) / sizeof(upper_dirs[0]), S_IRWXU) < 0) { + + if (mount(path, dirs[2], "none", MS_BIND | MS_REC, NULL) != 0) { goto end; } - ALLOCC(asprintf(&args, "lowerdir=%s,upperdir=%s,workdir=%s", dirs[0], dirs[1], dirs[2])); - CHECK(mount("overlay", path, "overlay", 0, args)); + if (mkdirp(dirs[3], S_IRWXU) != 0) goto end; + if (mkdirp(dirs[4], S_IRWXU) != 0) goto end; - end: - if (vol_dir) free(vol_dir); - if (mnt_dir) free(mnt_dir); - if (args) free(args); + if (mount("overlay", path, "overlay", 0, args) != 0) { + goto end; + } +end: + if (dirs) { for (size_t i = 0; i < dirs_sz; ++i) { - free(dirs[i]); - }; - - return errno == EEXIST ? 0 : errno; + if (dirs[i]) free(dirs[i]); + } + free(dirs); + } + return errno == EEXIST ? 0 : errno; } static void handle_mount(msg_id_t msg_id) { diff --git a/runtime/src/deploy.rs b/runtime/src/deploy.rs index 05e4f6bf..fcf390c3 100644 --- a/runtime/src/deploy.rs +++ b/runtime/src/deploy.rs @@ -3,12 +3,14 @@ use crc::crc32; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use std::io::SeekFrom; -use std::path::PathBuf; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; +use std::path::{Path, PathBuf}; +use tokio::io::AsyncReadExt; use tokio_byteorder::LittleEndian; use uuid::Uuid; use ya_runtime_sdk::runtime_api::deploy::ContainerVolume; +const VOLUME_OVERLAY_UPPER_DIR: &'static str = "upper"; + #[derive(Debug, Default, Deserialize, Serialize)] pub struct Deployment { #[serde(default)] @@ -18,7 +20,7 @@ pub struct Deployment { #[serde(default)] pub task_package: PathBuf, pub user: (u32, u32), - pub volumes: Vec, + pub volumes: Vec, pub config: Config, } @@ -30,6 +32,39 @@ pub struct Config { pub fs: Fs, } +#[derive(Clone, Debug, Default, Deserialize, Serialize)] +pub struct Volume { + pub base_name: String, + pub name: String, + pub path: String, +} + +impl Volume { + pub fn tag(&self, idx: usize) -> String { + format!("mnt{}", idx) + } + + pub fn base_dir>(&self, path: P) -> String { + path.as_ref() + .join(&self.base_name) + .to_string_lossy() + .to_string() + } + + pub fn dir>(&self, path: P) -> String { + path.as_ref().join(&self.name).to_string_lossy().to_string() + } +} + +impl Into for Volume { + fn into(self) -> ContainerVolume { + ContainerVolume { + name: self.name, + path: self.path, + } + } +} + /// Root filesystem overlay mode #[derive(Debug, Deserialize, Serialize)] #[serde(rename_all = "snake_case")] @@ -42,6 +77,15 @@ pub enum Fs { RamTmp, } +impl Fs { + pub fn in_memory(&self) -> bool { + match self { + Self::Ram => true, + _ => false, + } + } +} + impl Default for Fs { fn default() -> Self { Self::Disk @@ -49,15 +93,13 @@ impl Default for Fs { } impl Deployment { - pub async fn try_from_input( - mut input: Input, + pub async fn try_from_input( + task_package: PathBuf, cpu_cores: usize, mem_mib: usize, - task_package: PathBuf, - ) -> Result - where - Input: AsyncRead + AsyncSeek + Unpin, - { + ) -> Result { + let mut input = tokio::fs::File::open(&task_package).await?; + let json_len: u32 = { let mut buf = [0; 8]; input.seek(SeekFrom::End(-8)).await?; @@ -101,6 +143,10 @@ impl Deployment { .unwrap_or_else(Vec::new) } + pub fn volumes(&self) -> Vec { + self.volumes.iter().cloned().map(Into::into).collect() + } + pub fn init_args(&self) -> &'static str { match &self.config.fs { Fs::Ram => "-f ram", @@ -128,16 +174,21 @@ fn parse_user(user: Option<&String>) -> anyhow::Result<(u32, u32)> { Ok((uid, gid)) } -fn parse_volumes(volumes: Option<&HashMap>>) -> Vec { +fn parse_volumes(volumes: Option<&HashMap>>) -> Vec { let volumes = match volumes { Some(v) => v, _ => return Vec::new(), }; volumes .keys() - .map(|key| ContainerVolume { - name: format!("vol-{}", Uuid::new_v4()), - path: key.to_string(), + .map(|key| { + let base_name = format!("vol-{}", Uuid::new_v4()); + let name = format!("{}/{}", base_name, VOLUME_OVERLAY_UPPER_DIR); + Volume { + base_name, + name, + path: key.to_string(), + } }) .collect() } diff --git a/runtime/src/main.rs b/runtime/src/main.rs index c5b80316..2311e16a 100644 --- a/runtime/src/main.rs +++ b/runtime/src/main.rs @@ -167,25 +167,19 @@ impl ya_runtime_sdk::Runtime for Runtime { async fn deploy(workdir: PathBuf, cli: Cli) -> anyhow::Result { let workdir = normalize_path(&workdir).await?; - let package_path = normalize_path(&cli.task_package.unwrap()).await?; - let package_file = fs::File::open(&package_path).await?; - - let deployment = Deployment::try_from_input( - package_file, - cli.cpu_cores, - (cli.mem_gib * 1024.) as usize, - package_path, - ) - .await - .expect("Error reading package metadata"); + let package = normalize_path(&cli.task_package.unwrap()).await?; + let deployment = + Deployment::try_from_input(package, cli.cpu_cores, (cli.mem_gib * 1024.) as usize) + .await + .expect("Error reading package metadata"); + if !deployment.config.fs.in_memory() { + fs::create_dir_all(workdir.join(USER_FS_TAG)).await?; + } for vol in &deployment.volumes { - fs::create_dir_all(workdir.join(&vol.name)).await?; + fs::create_dir_all(vol.dir(&workdir)).await?; } - let rootfs_path = workdir.join(USER_FS_TAG); - fs::create_dir_all(&rootfs_path).await?; - fs::OpenOptions::new() .create(true) .write(true) @@ -197,7 +191,7 @@ async fn deploy(workdir: PathBuf, cli: Cli) -> anyhow::Result {} - _ => { - cmd.arg("-virtfs"); - cmd.arg(format!( - "local,id={tag},path={path},security_model=mapped,mount_tag={tag}", - tag = USER_FS_TAG, - path = work_dir.join(USER_FS_TAG).to_string_lossy(), - )); - } + if !deployment.config.fs.in_memory() { + cmd.arg("-virtfs"); + cmd.arg(format!( + "local,id={tag},path={path},security_model=mapped,mount_tag={tag}", + tag = USER_FS_TAG, + path = work_dir.join(USER_FS_TAG).to_string_lossy(), + )); } - for (idx, volume) in deployment.volumes.iter().enumerate() { + for (idx, vol) in deployment.volumes.iter().enumerate() { cmd.arg("-virtfs"); cmd.arg(format!( "local,id={tag},path={path},security_model=mapped,mount_tag={tag}", - tag = format!("mnt{}", idx), - path = work_dir.join(&volume.name).to_string_lossy(), + tag = vol.tag(idx), + path = vol.base_dir(&work_dir), // note the `base_dir` here )); } @@ -283,8 +274,7 @@ async fn start( .kill_on_drop(true) .spawn()?; - let stdout = runtime.stdout.take().unwrap(); - spawn(reader_to_log(stdout)); + spawn(reader_to_log(runtime.stdout.take().unwrap())); let ga = GuestAgent::connected(socket_path, 10, move |notification, ga| { let mut emitter = emitter.clone(); @@ -298,8 +288,8 @@ async fn start( { let mut ga = ga.lock().await; - for (idx, volume) in deployment.volumes.iter().enumerate() { - ga.mount(format!("mnt{}", idx).as_str(), volume.path.as_str()) + for (idx, vol) in deployment.volumes.iter().enumerate() { + ga.mount(vol.tag(idx).as_str(), vol.path.as_str()) .await? .expect("Mount failed"); } From 0b39dc263b4137eda5c778b064c800ce1c010b4e Mon Sep 17 00:00:00 2001 From: mfranciszkiewicz Date: Sun, 1 Aug 2021 16:08:54 +0200 Subject: [PATCH 3/5] Cleanup unused directories --- runtime/init-container/src/init.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index fcff28b3..c1d733c1 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -1403,8 +1403,6 @@ int main(int argc, char *argv[]) { 0, "lowerdir=/mnt/imagefs,upperdir=/mnt/userfs/upper,workdir=/mnt/userfs/work")); - CHECK(mkdirp("/mnt/overlay/mnt/volumes", S_IRWXU)); - CHECK(umount2("/dev", MNT_DETACH)); CHECK(chdir("/mnt/overlay")); CHECK(mount(".", "/", "none", MS_MOVE, NULL)); From dcdffcab5871b8595652ca895847dcdeabe07a2b Mon Sep 17 00:00:00 2001 From: mfranciszkiewicz Date: Mon, 2 Aug 2021 12:25:29 +0200 Subject: [PATCH 4/5] `Fs` kebab-case serialization --- runtime/src/deploy.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/runtime/src/deploy.rs b/runtime/src/deploy.rs index fcf390c3..6160bd9b 100644 --- a/runtime/src/deploy.rs +++ b/runtime/src/deploy.rs @@ -67,7 +67,7 @@ impl Into for Volume { /// Root filesystem overlay mode #[derive(Debug, Deserialize, Serialize)] -#[serde(rename_all = "snake_case")] +#[serde(rename_all = "kebab-case")] pub enum Fs { /// Mount the overlay on disk (default) Disk, @@ -147,12 +147,8 @@ impl Deployment { self.volumes.iter().cloned().map(Into::into).collect() } - pub fn init_args(&self) -> &'static str { - match &self.config.fs { - Fs::Ram => "-f ram", - Fs::RamTmp => "-f ram-tmp", - _ => "-f disk", - } + pub fn init_args(&self) -> String { + format!("-f {}", serde_json::to_string(&self.config.fs).unwrap()) } } From a2495d2c6779046cd9e118b1d3671bf9427e8fab Mon Sep 17 00:00:00 2001 From: mfranciszkiewicz Date: Mon, 2 Aug 2021 13:15:21 +0200 Subject: [PATCH 5/5] serde rename `Config::fs` to "Filesystem" extra init logs regarding root fs persistence --- runtime/init-container/src/init.c | 3 +++ runtime/src/deploy.rs | 1 + 2 files changed, 4 insertions(+) diff --git a/runtime/init-container/src/init.c b/runtime/init-container/src/init.c index c1d733c1..0979e468 100644 --- a/runtime/init-container/src/init.c +++ b/runtime/init-container/src/init.c @@ -1387,10 +1387,12 @@ int main(int argc, char *argv[]) { NULL)); if (args.fs != NULL && strcmp(args.fs, ARG_FS_RAM) == 0) { + fprintf(stderr, "mounting root fs as tmpfs\n"); CHECK(mount("tmpfs", "/mnt/userfs", "tmpfs", 0, "mode=0777,size=128M")); } else { + fprintf(stderr, "mounting root fs as 9p\n"); CHECK(mount(USER_FS_TAG, "/mnt/userfs", "9p", 0, "defaults,trans=virtio,version=9p2000.L,nodevmap,redirect_dir=on")); @@ -1423,6 +1425,7 @@ int main(int argc, char *argv[]) { "exec,mode=0755,size=2M")); if (args.fs != NULL && strcmp(args.fs, ARG_FS_RAM_TMP) == 0) { + fprintf(stderr, "mounting /tmp as tmpfs\n"); CHECK(mount("tmpfs", "/tmp", "tmpfs", MS_NOSUID, "mode=0777")); diff --git a/runtime/src/deploy.rs b/runtime/src/deploy.rs index 6160bd9b..e0108f64 100644 --- a/runtime/src/deploy.rs +++ b/runtime/src/deploy.rs @@ -28,6 +28,7 @@ pub struct Deployment { pub struct Config { #[serde(flatten)] pub container: ContainerConfig, + #[serde(rename = "Filesystem")] #[serde(default)] pub fs: Fs, }