Skip to content

Commit

Permalink
Create .so symlinks for driver libraries in container
Browse files Browse the repository at this point in the history
This change adds an opt-in feature for creating .so symlinks to
all injected driver files in a contianer.

If features.dot-so-symlinks = true is set in the config.toml, the creation
of symlinks for driver files is enabled. This can also be triggered on a
per-container basis using the envvar NVIDIA_DOT_SO_SYMLINKS=enabled.

Signed-off-by: Evan Lezar <[email protected]>
  • Loading branch information
elezar committed Apr 3, 2024
1 parent 26e52b8 commit baa5096
Show file tree
Hide file tree
Showing 12 changed files with 252 additions and 25 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# NVIDIA Container Toolkit Changelog

* Add a hook to create `.so` symlinks for driver libraries in a container.

## v1.15.0-rc.4
* Add a `--spec-dir` option to the `nvidia-ctk cdi generate` command. This allows specs outside of `/etc/cdi` and `/var/run/cdi` to be processed.
* Add support for extracting device major number from `/proc/devices` if `nvidia` is used as a device name over `nvidia-frontend`.
Expand Down
114 changes: 114 additions & 0 deletions cmd/nvidia-ctk/hook/create-dot-so-symlinks/create-dot-so-symlinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package dotsosymlinks

import (
"fmt"
"os"
"path/filepath"
"strings"

"github.com/urfave/cli/v2"

"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)

type command struct {
logger logger.Interface
}

type config struct {
containerSpec string
driverVersion string
}

// NewCommand constructs a hook command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}

// build
func (m command) build() *cli.Command {
cfg := config{}

// Create the '' command
c := cli.Command{
Name: "create-dot-so-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to process CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}

c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
&cli.StringFlag{
Name: "driver-version",
Usage: "specify the driver version for which the symlinks are to be created. This assumes driver libraries have the .so.`VERSION` suffix.",
Destination: &cfg.driverVersion,
Required: true,
},
}

return &c
}

func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}

containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}

libs, err := lookup.NewLibraryLocator(
lookup.WithLogger(m.logger),
lookup.WithRoot(containerRoot),
lookup.WithOptional(true),
).Locate("*.so." + cfg.driverVersion)
if err != nil {
return fmt.Errorf("failed to locate libraries for driver version %v: %v", cfg.driverVersion, err)
}

for _, lib := range libs {
if !strings.HasSuffix(lib, ".so."+cfg.driverVersion) {
continue
}
libSoPath := strings.TrimSuffix(lib, "."+cfg.driverVersion)
libSoXPaths, err := filepath.Glob(libSoPath + ".[0-9]")
if len(libSoXPaths) != 1 || err != nil {
continue
}
err = os.Symlink(filepath.Base(libSoXPaths[0]), libSoPath)
if err != nil {
continue
}
}
return nil
}
2 changes: 2 additions & 0 deletions cmd/nvidia-ctk/hook/hook.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/urfave/cli/v2"

createdotsosymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-dot-so-symlinks"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
)
Expand Down Expand Up @@ -50,6 +51,7 @@ func (m hookCommand) build() *cli.Command {
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
chmod.NewCommand(m.logger),
createdotsosymlinks.NewCommand(m.logger),
}

return &hook
Expand Down
29 changes: 20 additions & 9 deletions internal/config/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@ package config
type featureName string

const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
FeatureDotSoSymlinks = featureName("dot-so-symlinks")

// featureNotControlledByEnvvar is used for features that have no envvar to
// allow per-container opt-in.
featureNotControlledByEnvvar = ""
)

// features specifies a set of named features.
Expand All @@ -31,6 +36,9 @@ type features struct {
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
// DotSoSymlinks allows for the creation of .so symlinks to .so.1 driver
// files to be opted out of.
DotSoSymlinks *feature `toml:"dot-so-symlinks,omitempty"`
}

type feature bool
Expand All @@ -40,10 +48,11 @@ type feature bool
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureDotSoSymlinks: "NVIDIA_DOT_SO_SYMLINKS",
}

envvar := featureEnvvars[n]
Expand All @@ -56,6 +65,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
case FeatureDotSoSymlinks:
return fs.DotSoSymlinks.isEnabled(envvar, in...)
default:
return false
}
Expand All @@ -69,7 +80,7 @@ func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if f != nil {
return bool(*f)
}
if envvar == "" {
if envvar == featureNotControlledByEnvvar {
return false
}
for _, in := range ins {
Expand Down
27 changes: 27 additions & 0 deletions internal/discover/dot_so_symlinks.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package discover

// NewDotSoSymlinksDiscoverer creates a discoverer that generates a hook to create .so symlinks in
// a container.
func NewDotSoSymlinksDiscoverer(nvidiaCTKPath string, version string) Discover {
return CreateNvidiaCTKHook(
nvidiaCTKPath,
"create-dot-so-symlinks",
"--driver-version", version,
)
}
15 changes: 5 additions & 10 deletions internal/discover/graphics.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/info/proc"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
)

Expand Down Expand Up @@ -256,20 +255,16 @@ func optionalXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidia
}

func newXorgDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCTKPath string) (Discover, error) {
libCudaPaths, err := cuda.New(
driver.Libraries(),
).Locate(".*.*")
libRoot, err := driver.LibraryRoot()
if err != nil {
return nil, fmt.Errorf("failed to locate libcuda.so: %v", err)
return nil, fmt.Errorf("failed to determine driver library root: %w", err)
}
libcudaPath := libCudaPaths[0]

version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return nil, fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
version, err := driver.Version()
if err != nil {
return nil, fmt.Errorf("failed to determine driver version: %w", err)
}

libRoot := filepath.Dir(libcudaPath)
xorgLibs := NewMounts(
logger,
lookup.NewFileLocator(
Expand Down
6 changes: 6 additions & 0 deletions internal/lookup/root/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,9 @@ func WithLibrarySearchPaths(paths ...string) Option {
d.librarySearchPaths = paths
}
}

func WithVersion(version string) Option {
return func(d *Driver) {
d.version = version
}
}
53 changes: 53 additions & 0 deletions internal/lookup/root/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,19 +17,28 @@
package root

import (
"fmt"
"path/filepath"
"strings"
"sync"

"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
)

// Driver represents a filesystem in which a set of drivers or devices is defined.
type Driver struct {
sync.Mutex
logger logger.Interface
// Root represents the root from the perspective of the driver libraries and binaries.
Root string
// librarySearchPaths specifies explicit search paths for discovering libraries.
librarySearchPaths []string
// version stores the driver version. This can be specified at construction or cached on subsequent calls.
version string
// libraryRoot stores the absolute path where the driver libraries (libcuda.so.<VERSION>) can be found.
libraryRoot string
}

// New creates a new Driver root using the specified options.
Expand All @@ -53,6 +62,50 @@ func (r *Driver) Libraries() lookup.Locator {
)
}

// Version returns the driver version as a string.
func (r *Driver) Version() (string, error) {
r.Lock()
defer r.Unlock()
if r.version != "" {
return r.version, nil
}

libCudaPaths, err := cuda.New(
r.Libraries(),
).Locate(".*.*")
if err != nil {
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
}
libcudaPath := libCudaPaths[0]

version := strings.TrimPrefix(filepath.Base(libcudaPath), "libcuda.so.")
if version == "" {
return "", fmt.Errorf("failed to determine libcuda.so version from path: %q", libcudaPath)
}

r.version = version
return r.version, nil
}

// LibraryRoot returns the folder in which the driver libraries can be found.
func (r *Driver) LibraryRoot() (string, error) {
r.Lock()
defer r.Unlock()
if r.libraryRoot != "" {
return r.libraryRoot, nil
}

libCudaPaths, err := cuda.New(
r.Libraries(),
).Locate(".*.*")
if err != nil {
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
}

r.libraryRoot = filepath.Dir(libCudaPaths[0])
return r.libraryRoot, nil
}

// normalizeSearchPaths takes a list of paths and normalized these.
// Each of the elements in the list is expanded if it is a path list and the
// resultant list is returned.
Expand Down
Loading

0 comments on commit baa5096

Please sign in to comment.