Skip to content

Commit

Permalink
Merge branch 'upstream-add-alternate-device-list' into 'master'
Browse files Browse the repository at this point in the history
Add the ability to pull the device list from mounted files instead of just Envvars

See merge request nvidia/container-toolkit/container-toolkit!15
  • Loading branch information
klueska committed Jul 24, 2020
2 parents 2ea3150 + 32b4b09 commit 4448319
Show file tree
Hide file tree
Showing 11 changed files with 288 additions and 192 deletions.
3 changes: 2 additions & 1 deletion config/config.toml.amzn
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
Expand All @@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
3 changes: 2 additions & 1 deletion config/config.toml.centos
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
Expand All @@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
3 changes: 2 additions & 1 deletion config/config.toml.debian
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
Expand All @@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
3 changes: 2 additions & 1 deletion config/config.toml.opensuse-leap
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
Expand All @@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
3 changes: 2 additions & 1 deletion config/config.toml.ubuntu
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"

[nvidia-container-cli]
#root = "/run/nvidia/driver"
Expand All @@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
#alpha-merge-visible-devices-envvars = false

[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
131 changes: 0 additions & 131 deletions container_config_test.go

This file was deleted.

12 changes: 0 additions & 12 deletions pkg/Godeps/Godeps.json

This file was deleted.

Binary file removed pkg/container-toolkit
Binary file not shown.
93 changes: 76 additions & 17 deletions pkg/container_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"

Expand Down Expand Up @@ -73,13 +74,23 @@ type LinuxCapabilities struct {
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}

// Mount from OCI runtime spec
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
type Mount struct {
Destination string `json:"destination"`
Type string `json:"type,omitempty" platform:"linux,solaris"`
Source string `json:"source,omitempty"`
Options []string `json:"options,omitempty"`
}

// Spec from OCI runtime spec
// We use pointers to structs, similarly to the latest version of runtime-spec:
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
type Spec struct {
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []Mount `json:"mounts,omitempty"`
}

// HookState holds state information about the hook
Expand Down Expand Up @@ -108,7 +119,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
return
}

func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
func getEnvMap(e []string) (m map[string]string) {
m = make(map[string]string)
for _, s := range e {
p := strings.SplitN(s, "=", 2)
Expand All @@ -117,17 +128,6 @@ func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
}
m[p[0]] = p[1]
}
if config.AlphaMergeVisibleDevicesEnvvars {
var mergable []string
for k, v := range m {
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
mergable = append(mergable, v)
}
}
if len(mergable) > 0 {
m[envNVVisibleDevices] = strings.Join(mergable, ",")
}
}
return
}

Expand Down Expand Up @@ -198,7 +198,7 @@ func isLegacyCUDAImage(env map[string]string) bool {
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}

func getDevices(env map[string]string, legacyImage bool) *string {
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
// Build a list of envvars to consider.
envVars := []string{envNVVisibleDevices}
if envSwarmGPU != nil {
Expand Down Expand Up @@ -236,6 +236,65 @@ func getDevices(env map[string]string, legacyImage bool) *string {
return devices
}

func getDevicesFromMounts(root string, mounts []Mount) *string {
var devices []string
for _, m := range mounts {
root := filepath.Clean(root)
source := filepath.Clean(m.Source)
destination := filepath.Clean(m.Destination)

// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
// Only consider container mount points that begin with 'root'
if len(destination) < len(root) {
continue
}
if destination[:len(root)] != root {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := destination[len(root):]
if len(device) > 0 && device[0] == '/' {
device = device[1:]
}
if len(device) == 0 {
continue
}
devices = append(devices, device)
}

if devices == nil {
return nil
}

ret := strings.Join(devices, ",")
return &ret
}

func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
// Try and get the device list from mount volumes first
devices := getDevicesFromMounts(*hookConfig.DeviceListVolumeMount, mounts)
if devices != nil {
return devices
}

// Fallback to reading from the environment variable if privileges are correct
devices = getDevicesFromEnvvar(env, legacyImage)
if devices == nil {
return nil
}
if privileged || hookConfig.AcceptEnvvarUnprivileged {
return devices
}

// Error out otherwise
log.Panicln("insufficient privileges to read device list from NVIDIA_VISIBLE_DEVICES envvar")

return nil
}

func getMigConfigDevices(env map[string]string) *string {
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
Expand Down Expand Up @@ -296,11 +355,11 @@ func getRequirements(env map[string]string, legacyImage bool) []string {
return requirements
}

func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := isLegacyCUDAImage(env)

var devices string
if d := getDevices(env, legacyImage); d != nil {
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
devices = *d
} else {
// 'nil' devices means this is not a GPU container.
Expand Down Expand Up @@ -357,13 +416,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {

s := loadSpec(path.Join(b, "config.json"))

env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
env := getEnvMap(s.Process.Env)
privileged := isPrivileged(s)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: env,
Nvidia: getNvidiaConfig(env, privileged),
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
}
}
Loading

0 comments on commit 4448319

Please sign in to comment.