diff --git a/e2e/containerd/seccomp_default_profile/arch/amd64.json b/e2e/containerd/seccomp_default_profile/arch/amd64.json new file mode 100644 index 00000000000..4014efd3685 --- /dev/null +++ b/e2e/containerd/seccomp_default_profile/arch/amd64.json @@ -0,0 +1,9 @@ +{ + "syscalls": [{ + "names": [ + "arch_prctl", + "modify_ldt" + ], + "action": "SCMP_ACT_ALLOW" + }] +} diff --git a/e2e/containerd/seccomp_default_profile/arch/arm.json b/e2e/containerd/seccomp_default_profile/arch/arm.json new file mode 100644 index 00000000000..4ced98bfce0 --- /dev/null +++ b/e2e/containerd/seccomp_default_profile/arch/arm.json @@ -0,0 +1,13 @@ +{ + "syscalls": [{ + "names": [ + "arm_fadvise64_64", + "arm_sync_file_range", + "sync_file_range2", + "breakpoint", + "cacheflush", + "set_tls" + ], + "action": "SCMP_ACT_ALLOW" + }] +} diff --git a/e2e/containerd/seccomp_default_profile/base.json b/e2e/containerd/seccomp_default_profile/base.json new file mode 100644 index 00000000000..539c6388388 --- /dev/null +++ b/e2e/containerd/seccomp_default_profile/base.json @@ -0,0 +1,443 @@ +{ + "defaultAction": "SCMP_ACT_ERRNO", + "syscalls": [ + { + "names": [ + "accept", + "accept4", + "access", + "adjtimex", + "alarm", + "bind", + "brk", + "cachestat", + "capget", + "capset", + "chdir", + "chmod", + "chown", + "chown32", + "clock_adjtime", + "clock_adjtime64", + "clock_getres", + "clock_getres_time64", + "clock_gettime", + "clock_gettime64", + "clock_nanosleep", + "clock_nanosleep_time64", + "close", + "close_range", + "connect", + "copy_file_range", + "creat", + "dup", + "dup2", + "dup3", + "epoll_create", + "epoll_create1", + "epoll_ctl", + "epoll_ctl_old", + "epoll_pwait", + "epoll_pwait2", + "epoll_wait", + "epoll_wait_old", + "eventfd", + "eventfd2", + "execve", + "execveat", + "exit", + "exit_group", + "faccessat", + "faccessat2", + "fadvise64", + "fadvise64_64", + "fallocate", + "fanotify_mark", + "fchdir", + "fchmod", + "fchmodat", + "fchmodat2", + "fchown", + "fchown32", + "fchownat", + "fcntl", + "fcntl64", + "fdatasync", + "fgetxattr", + "flistxattr", + "flock", + "fork", + "fremovexattr", + "fsetxattr", + "fstat", + "fstat64", + "fstatat64", + "fstatfs", + "fstatfs64", + "fsync", + "ftruncate", + "ftruncate64", + "futex", + "futex_requeue", + "futex_time64", + "futex_wait", + "futex_waitv", + "futex_wake", + "futimesat", + "getcpu", + "getcwd", + "getdents", + "getdents64", + "getegid", + "getegid32", + "geteuid", + "geteuid32", + "getgid", + "getgid32", + "getgroups", + "getgroups32", + "getitimer", + "getpeername", + "getpgid", + "getpgrp", + "getpid", + "getppid", + "getpriority", + "getrandom", + "getresgid", + "getresgid32", + "getresuid", + "getresuid32", + "getrlimit", + "get_robust_list", + "getrusage", + "getsid", + "getsockname", + "getsockopt", + "get_thread_area", + "gettid", + "gettimeofday", + "getuid", + "getuid32", + "getxattr", + "inotify_add_watch", + "inotify_init", + "inotify_init1", + "inotify_rm_watch", + "io_cancel", + "ioctl", + "io_destroy", + "io_getevents", + "io_pgetevents", + "io_pgetevents_time64", + "ioprio_get", + "ioprio_set", + "io_setup", + "io_submit", + "io_uring_enter", + "io_uring_register", + "io_uring_setup", + "ipc", + "kill", + "landlock_add_rule", + "landlock_create_ruleset", + "landlock_restrict_self", + "lchown", + "lchown32", + "lgetxattr", + "link", + "linkat", + "listen", + "listxattr", + "llistxattr", + "_llseek", + "lremovexattr", + "lseek", + "lsetxattr", + "lstat", + "lstat64", + "madvise", + "membarrier", + "memfd_create", + "memfd_secret", + "mincore", + "mkdir", + "mkdirat", + "mknod", + "mknodat", + "mlock", + "mlock2", + "mlockall", + "map_shadow_stack", + "mmap", + "mmap2", + "mprotect", + "mq_getsetattr", + "mq_notify", + "mq_open", + "mq_timedreceive", + "mq_timedreceive_time64", + "mq_timedsend", + "mq_timedsend_time64", + "mq_unlink", + "mremap", + "msgctl", + "msgget", + "msgrcv", + "msgsnd", + "msync", + "munlock", + "munlockall", + "munmap", + "name_to_handle_at", + "nanosleep", + "newfstatat", + "_newselect", + "open", + "openat", + "openat2", + "pause", + "pidfd_open", + "pidfd_send_signal", + "pipe", + "pipe2", + "pkey_alloc", + "pkey_free", + "pkey_mprotect", + "poll", + "ppoll", + "ppoll_time64", + "prctl", + "pread64", + "preadv", + "preadv2", + "prlimit64", + "process_mrelease", + "pselect6", + "pselect6_time64", + "pwrite64", + "pwritev", + "pwritev2", + "read", + "readahead", + "readlink", + "readlinkat", + "readv", + "recv", + "recvfrom", + "recvmmsg", + "recvmmsg_time64", + "recvmsg", + "remap_file_pages", + "removexattr", + "rename", + "renameat", + "renameat2", + "restart_syscall", + "rmdir", + "rseq", + "rt_sigaction", + "rt_sigpending", + "rt_sigprocmask", + "rt_sigqueueinfo", + "rt_sigreturn", + "rt_sigsuspend", + "rt_sigtimedwait", + "rt_sigtimedwait_time64", + "rt_tgsigqueueinfo", + "sched_getaffinity", + "sched_getattr", + "sched_getparam", + "sched_get_priority_max", + "sched_get_priority_min", + "sched_getscheduler", + "sched_rr_get_interval", + "sched_rr_get_interval_time64", + "sched_setaffinity", + "sched_setattr", + "sched_setparam", + "sched_setscheduler", + "sched_yield", + "seccomp", + "select", + "semctl", + "semget", + "semop", + "semtimedop", + "semtimedop_time64", + "send", + "sendfile", + "sendfile64", + "sendmmsg", + "sendmsg", + "sendto", + "setfsgid", + "setfsgid32", + "setfsuid", + "setfsuid32", + "setgid", + "setgid32", + "setgroups", + "setgroups32", + "setitimer", + "setpgid", + "setpriority", + "setregid", + "setregid32", + "setresgid", + "setresgid32", + "setresuid", + "setresuid32", + "setreuid", + "setreuid32", + "setrlimit", + "set_robust_list", + "setsid", + "setsockopt", + "set_thread_area", + "set_tid_address", + "setuid", + "setuid32", + "setxattr", + "shmat", + "shmctl", + "shmdt", + "shmget", + "shutdown", + "sigaltstack", + "signalfd", + "signalfd4", + "sigprocmask", + "sigreturn", + "socketcall", + "socketpair", + "splice", + "stat", + "stat64", + "statfs", + "statfs64", + "statx", + "symlink", + "symlinkat", + "sync", + "sync_file_range", + "syncfs", + "sysinfo", + "tee", + "tgkill", + "time", + "timer_create", + "timer_delete", + "timer_getoverrun", + "timer_gettime", + "timer_gettime64", + "timer_settime", + "timer_settime64", + "timerfd_create", + "timerfd_gettime", + "timerfd_gettime64", + "timerfd_settime", + "timerfd_settime64", + "times", + "tkill", + "truncate", + "truncate64", + "ugetrlimit", + "umask", + "uname", + "unlink", + "unlinkat", + "utime", + "utimensat", + "utimensat_time64", + "utimes", + "vfork", + "vmsplice", + "wait4", + "waitid", + "waitpid", + "write", + "writev" + ], + "action": "SCMP_ACT_ALLOW" + }, + { + "names": [ + "socket" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 40, + "op": "SCMP_CMP_NE" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 0, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 8, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131072, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 131080, + "op": "SCMP_CMP_EQ" + } + ] + }, + { + "names": [ + "personality" + ], + "action": "SCMP_ACT_ALLOW", + "args": [ + { + "index": 0, + "value": 4294967295, + "op": "SCMP_CMP_EQ" + } + ] + } + ] +} \ No newline at end of file diff --git a/e2e/containerd/seccomp_default_profile/kernel/48.json b/e2e/containerd/seccomp_default_profile/kernel/48.json new file mode 100644 index 00000000000..4ced98bfce0 --- /dev/null +++ b/e2e/containerd/seccomp_default_profile/kernel/48.json @@ -0,0 +1,13 @@ +{ + "syscalls": [{ + "names": [ + "arm_fadvise64_64", + "arm_sync_file_range", + "sync_file_range2", + "breakpoint", + "cacheflush", + "set_tls" + ], + "action": "SCMP_ACT_ALLOW" + }] +} diff --git a/e2e/scenario_helpers_test.go b/e2e/scenario_helpers_test.go index 3e49cbc0244..fe86828c7c6 100644 --- a/e2e/scenario_helpers_test.go +++ b/e2e/scenario_helpers_test.go @@ -126,6 +126,9 @@ func createAndValidateVM(ctx context.Context, t *testing.T, scenario *Scenario) validateWasm(ctx, t, scenario.Runtime.Cluster.Kube, nodeName) } + if scenario.Tags.KubeletCustomConfig { + createCustomKubeletConfigDebugPod(ctx, t, scenario.Runtime.Cluster.Kube, nodeName, scenario.Tags.Airgap) + } t.Logf("node %s is ready, proceeding with validation commands...", vmssName) vmPrivateIP, err := getVMPrivateIPAddress(ctx, *scenario.Runtime.Cluster.Model.Properties.NodeResourceGroup, vmssName) @@ -182,7 +185,7 @@ func getCustomScriptExtensionStatus(ctx context.Context, t *testing.T, resourceG if resp.ExitCode != "0" { return fmt.Errorf("vmssCSE %s, output=%s, error=%s", resp.ExitCode, resp.Output, resp.Error) } - t.Logf("CSE completed successfully with exit code 0, cse output: %s", *status.Message) + t.Logf("CSE completed successfully with exit code %s, cse output: %s", resp.ExitCode, *status.Message) return nil } } diff --git a/e2e/scenario_test.go b/e2e/scenario_test.go index 4125721440a..ec9b30eb26e 100644 --- a/e2e/scenario_test.go +++ b/e2e/scenario_test.go @@ -1257,6 +1257,8 @@ func Test_AzureLinuxV2MessageOfTheDay(t *testing.T) { func Test_Ubuntu2204_KubeletCustomConfig(t *testing.T) { kubeletConfigFilePath := "/etc/default/kubeletconfig.json" + // as in template.go + defaultProfileContainerName := "runtime-default-container" RunScenario(t, &Scenario{ Tags: Tags{ KubeletCustomConfig: true, @@ -1277,6 +1279,7 @@ func Test_Ubuntu2204_KubeletCustomConfig(t *testing.T) { LiveVMValidators: []*LiveVMValidator{ KubeletHasConfigFlagsValidator(kubeletConfigFilePath), FileHasContentsValidator(kubeletConfigFilePath, "\"seccompDefault\": true"), + SeccompProfileValidator("base", defaultProfileContainerName), }, }, }) @@ -1301,6 +1304,7 @@ func Test_AzureLinuxV2_KubeletCustomConfig(t *testing.T) { nbc.AgentPoolProfile.CustomKubeletConfig = customKubeletConfig nbc.ContainerService.Properties.AgentPoolProfiles[0].CustomKubeletConfig = customKubeletConfig }, + LiveVMValidators: []*LiveVMValidator{ KubeletHasConfigFlagsValidator(kubeletConfigFilePath), FileHasContentsValidator(kubeletConfigFilePath, "\"seccompDefault\": true"), diff --git a/e2e/template.go b/e2e/template.go index b06f2af462b..4e1fde21dcf 100644 --- a/e2e/template.go +++ b/e2e/template.go @@ -540,10 +540,7 @@ func baseTemplate(location string) *datamodel.NodeBootstrappingConfiguration { } func getHTTPServerTemplate(podName, nodeName string, isAirgap bool) string { - image := "mcr.microsoft.com/cbl-mariner/busybox:2.0" - if isAirgap { - image = fmt.Sprintf("%s.azurecr.io/aks/cbl-mariner/busybox:2.0", config.PrivateACRName) - } + image := getBaseImageName(isAirgap) return fmt.Sprintf(`apiVersion: v1 kind: Pod @@ -572,6 +569,14 @@ spec: `, podName, image, nodeName) } +func getBaseImageName(isAirgap bool) string { + image := "mcr.microsoft.com/cbl-mariner/busybox:2.0" + if isAirgap { + image = fmt.Sprintf("%s.azurecr.io/aks/cbl-mariner/busybox:2.0", config.PrivateACRName) + } + return image +} + func getWasmSpinPodTemplate(podName, nodeName string) string { return fmt.Sprintf(`apiVersion: v1 kind: Pod @@ -600,3 +605,27 @@ spec: kubernetes.io/hostname: %s `, podName, nodeName) } + +func getSecurityContextPodTemplate(isAirgap bool, nodeName string, podName string) string { + image := getBaseImageName(isAirgap) + return fmt.Sprintf(`apiVersion: v1 +kind: Pod +metadata: + name: %s +spec: + containers: + - name: default-container + image: %s + imagePullPolicy: IfNotPresent + command: ['sh', '-c', 'echo "start pod without securityContext!" && sleep 3600'] + - name: runtime-default-container + image: %s + imagePullPolicy: IfNotPresent + command: ['sh', '-c', 'echo "start runtime default pod!" && sleep 3600'] + securityContext: + seccompProfile: + type: RuntimeDefault + nodeSelector: + kubernetes.io/hostname: %s +`, podName, image, image, nodeName) +} diff --git a/e2e/types.go b/e2e/types.go index 8b33a51d42e..00872e013e9 100644 --- a/e2e/types.go +++ b/e2e/types.go @@ -169,6 +169,9 @@ type LiveVMValidator struct { // IsPodNetwork is a boolean flags which indicates whether or not the validator should run on a pod that is NOT using // host's network interface. For example when testing connectivity from user pods to certain endpoints, we will set it to true IsPodNetwork bool + + // IsPrivileged is a boolean flag which indicates whether or not the command should be run as a privileged user + IsPrivileged bool } func (s *Scenario) PrepareAKSNodeConfig() { diff --git a/e2e/validation.go b/e2e/validation.go index 58d26d48cca..2a2fe4c2225 100644 --- a/e2e/validation.go +++ b/e2e/validation.go @@ -56,7 +56,11 @@ func runLiveVMValidators(ctx context.Context, t *testing.T, vmssName, privateIP, var err error // Non Host Validators - meaning we want to execute checks through a pod which is NOT connected to host's network if validator.IsPodNetwork { - execResult, err = execOnUnprivilegedPod(ctx, scenario.Runtime.Cluster.Kube, "default", nonHostPodName, validator.Command) + if validator.IsPrivileged { + execResult, err = execOnPrivilegedPod(ctx, scenario.Runtime.Cluster.Kube, "default", nonHostPodName, validator.Command) + } else { + execResult, err = execOnUnprivilegedPod(ctx, scenario.Runtime.Cluster.Kube, "default", nonHostPodName, validator.Command) + } } else { execResult, err = execOnVM(ctx, scenario.Runtime.Cluster.Kube, privateIP, hostPodName, sshPrivateKey, validator.Command, validator.IsShellBuiltIn) } @@ -185,3 +189,34 @@ func leakedSecretsValidators(scenario *Scenario) []*LiveVMValidator { } return validators } + +func createCustomKubeletConfigDebugPod(ctx context.Context, t *testing.T, kube *Kubeclient, nodeName string, isAirgap bool) { + testPodName := "security-context-profile-test-pod" + testPodManifest := getSecurityContextPodTemplate(isAirgap, nodeName, testPodName) + t.Logf("Custom kubelet config scenario: running debug pod on node %s ...", nodeName) + err := ensurePod(ctx, t, defaultNamespace, kube, testPodName, testPodManifest) + require.NoError(t, err, "failed to create kubelet debug pod, unable to ensure test pod on node %q", nodeName) +} + +func isProperSubset(subset []string, superset []string) bool { + setMap := make(map[string]bool) + for _, item := range superset { + setMap[item] = true + } + for _, item := range subset { + if !setMap[item] { + return false + } + } + return true +} + +// define type according to fit the json file base.json +type SimpleSeccompProfile struct { + Syscalls []Syscall `json:"syscalls"` +} + +type Syscall struct { + Names []string `json:"names"` + Action string `json:"action"` +} diff --git a/e2e/validators.go b/e2e/validators.go index dfa2e8762c9..b350879f7e1 100644 --- a/e2e/validators.go +++ b/e2e/validators.go @@ -1,8 +1,10 @@ package e2e import ( + "encoding/json" "fmt" "net" + "os" "regexp" "strings" ) @@ -133,7 +135,7 @@ func FileHasContentsValidator(fileName string, contents string) *LiveVMValidator command := makeExecutableCommand(steps) return &LiveVMValidator{ - Description: fmt.Sprintf("Assert that %s has defined contents", fileName), + Description: fmt.Sprintf("assert that %s has defined contents", fileName), // on mariner and ubuntu, the chronyd drop-in file is not readable by the default user, so we run as root. Command: command, Asserter: func(code, stdout, stderr string) error { @@ -426,3 +428,40 @@ func KubeletHasConfigFlagsValidator(filePath string) *LiveVMValidator { }, } } + +func SeccompProfileValidator(profileFilePath string, defaultProfileContainerName string) *LiveVMValidator { + return &LiveVMValidator{ + Description: fmt.Sprintf("assert default seccomp profile for type %s does not change", profileFilePath), + Command: fmt.Sprintf("'crictl inspect $(crictl ps -q --name=%s) | jq \".info.runtimeSpec.linux.seccomp\"'", defaultProfileContainerName), + IsPodNetwork: true, + IsPrivileged: true, + Asserter: func(code, stdout, stderr string) error { + baseProfileFile, err := os.ReadFile("containerd/seccomp_default_profile/" + profileFilePath + ".json") + if err != nil { + return fmt.Errorf("could not read base seccomp file json: %w", err) + } + expected := SimpleSeccompProfile{} + if err = json.Unmarshal(baseProfileFile, &expected); err != nil { + return fmt.Errorf(fmt.Sprintf("expected to find flag %s, but not found: %s", "config", stdout)) + } + // loop through the syscalls and add them to the map[action]=names + expectedSyscallsConsolidated := make(map[string][]string) + for _, syscall := range expected.Syscalls { + expectedSyscallsConsolidated[syscall.Action] = append(syscall.Names, expectedSyscallsConsolidated[syscall.Action]...) + } + actual := SimpleSeccompProfile{} + json.Unmarshal([]byte(stdout), &actual) + actualSyscallsConsolidated := make(map[string][]string) + for _, syscall := range actual.Syscalls { + actualSyscallsConsolidated[syscall.Action] = append(syscall.Names, actualSyscallsConsolidated[syscall.Action]...) + } + // assert current values are still there + for action, expectedNames := range expectedSyscallsConsolidated { + if !isProperSubset(actualSyscallsConsolidated[action], expectedNames) { + return fmt.Errorf("expected syscall %s with action %s not found in actual profile", expectedNames, action) + } + } + return nil + }, + } +}