Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate CDI specification including additional GIDs #630

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions cmd/nvidia-ctk/cdi/generate/generate.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,11 @@ type options struct {
files cli.StringSlice
ignorePatterns cli.StringSlice
}

// spec represents the CDI spec options.
spec struct {
allowAdditionalGIDs bool
}
}

// NewCommand constructs a generate-cdi command with the specified logger
Expand Down Expand Up @@ -169,6 +174,11 @@ func (m command) build() *cli.Command {
Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns,
},
&cli.BoolFlag{
Name: "--allow-additional-gids",
Usage: "Allow the use of the additionalGIDs field for generated CDI specifications. Note this will generate a v0.7.0 CDI specification.",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Which versions of containerd / cri-o (and other runtimes for that matter) support this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would have to check. Note that this is not enabled by default when generating the CDI spec and only for internal representations.

Copy link
Member Author

@elezar elezar Aug 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Destination: &opts.spec.allowAdditionalGIDs,
},
}

return &c
Expand Down Expand Up @@ -273,6 +283,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithLibrarySearchPaths(opts.librarySearchPaths.Value()),
nvcdi.WithCSVFiles(opts.csv.files.Value()),
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
nvcdi.WithAllowAdditionalGIDs(opts.spec.allowAdditionalGIDs),
)
if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err)
Expand Down
49 changes: 31 additions & 18 deletions internal/config/features.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,17 @@
package config

type featureName string
type feature bool

const (
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")
FeatureGDRCopy = featureName("gdrcopy")
FeatureAllowAdditionalGIDs = featureName("allow-additional-gids")
FeatureGDRCopy = featureName("gdrcopy")
FeatureGDS = featureName("gds")
FeatureMOFED = featureName("mofed")
FeatureNVSWITCH = featureName("nvswitch")

featureEnabled feature = true
featureDisabled feature = false
)

// features specifies a set of named features.
Expand All @@ -31,19 +36,23 @@ type features struct {
MOFED *feature `toml:"mofed,omitempty"`
NVSWITCH *feature `toml:"nvswitch,omitempty"`
GDRCopy *feature `toml:"gdrcopy,omitempty"`
// AllowAdditionalGIDs triggers the additionalGIDs field in internal CDI
// specifications to be populated if required. This can be useful when
// running the container as a user id that may not have access to device
// nodes.
AllowAdditionalGIDs *feature `toml:"allow-additional-gids,omitempty"`
}

type feature bool

// IsEnabled checks whether a specified named feature is enabled.
// An optional list of environments to check for feature-specific environment
// variables can also be supplied.
func (fs features) IsEnabled(n featureName, in ...getenver) bool {
featureEnvvars := map[featureName]string{
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureGDS: "NVIDIA_GDS",
FeatureMOFED: "NVIDIA_MOFED",
FeatureNVSWITCH: "NVIDIA_NVSWITCH",
FeatureGDRCopy: "NVIDIA_GDRCOPY",
FeatureAllowAdditionalGIDs: "NVIDIA_ALLOW_ADDITIONAL_GIDS",
}

envvar := featureEnvvars[n]
Expand All @@ -56,6 +65,8 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
return fs.NVSWITCH.isEnabled(envvar, in...)
case FeatureGDRCopy:
return fs.GDRCopy.isEnabled(envvar, in...)
case FeatureAllowAdditionalGIDs:
return fs.AllowAdditionalGIDs.isEnabled(envvar, in...)
default:
return false
}
Expand All @@ -66,17 +77,19 @@ func (fs features) IsEnabled(n featureName, in ...getenver) bool {
// associated envvar is checked in the specified getenver for the string "enabled"
// A CUDA container / image can be passed here.
func (f *feature) isEnabled(envvar string, ins ...getenver) bool {
if envvar != "" {
for _, in := range ins {
switch in.Getenv(envvar) {
case "enabled", "true":
return true
case "disabled", "false":
return false
}
}
}
if f != nil {
return bool(*f)
}
if envvar == "" {
return false
}
for _, in := range ins {
if in.Getenv(envvar) == "enabled" {
return true
}
}
return false
}

Expand Down
150 changes: 150 additions & 0 deletions internal/config/features_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package config

import (
"testing"

"github.com/stretchr/testify/require"
)

func TestFeatures(t *testing.T) {
testCases := []struct {
description string
features features
expected map[featureName]bool
envs []getenver
}{
{
description: "empty features",
expected: map[featureName]bool{
FeatureAllowAdditionalGIDs: false,
FeatureGDRCopy: false,
FeatureGDS: false,
FeatureMOFED: false,
FeatureNVSWITCH: false,
},
},
{
description: "envvar sets features if enabled",
expected: map[featureName]bool{
FeatureAllowAdditionalGIDs: true,
FeatureGDRCopy: false,
FeatureGDS: false,
FeatureMOFED: false,
FeatureNVSWITCH: false,
},
envs: []getenver{
mockEnver{
"NVIDIA_ALLOW_ADDITIONAL_GIDS": "enabled",
},
},
},
{
description: "envvar sets features if true",
expected: map[featureName]bool{
FeatureAllowAdditionalGIDs: true,
FeatureGDRCopy: false,
FeatureGDS: false,
FeatureMOFED: false,
FeatureNVSWITCH: false,
},
envs: []getenver{
mockEnver{
"NVIDIA_ALLOW_ADDITIONAL_GIDS": "true",
},
},
},
{
description: "feature sets feature",
features: features{
AllowAdditionalGIDs: ptr(featureEnabled),
},
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
for n, v := range tc.expected {
t.Run(tc.description+"-"+string(n), func(t *testing.T) {
require.EqualValues(t, v, tc.features.IsEnabled(n, tc.envs...))
})
}

})
}
}

func TestFeature(t *testing.T) {
testCases := []struct {
description string
feature *feature
envvar string
envs []getenver
expected bool
}{
{
description: "nil feature is false",
feature: nil,
expected: false,
},
{
description: "feature enables",
feature: ptr(featureEnabled),
expected: true,
},
{
description: "feature disabled",
feature: ptr(featureDisabled),
expected: false,
},
{
description: "envvar overrides feature disabled",
feature: ptr(featureDisabled),
envvar: "FEATURE",
envs: []getenver{
mockEnver{"FEATURE": "enabled"},
},
expected: true,
},
{
description: "envvar overrides feature enabled",
feature: ptr(featureEnabled),
envvar: "FEATURE",
envs: []getenver{
mockEnver{"FEATURE": "disabled"},
},
expected: false,
},
}

for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
require.EqualValues(t, tc.expected, tc.feature.isEnabled(tc.envvar, tc.envs...))
})
}
}

type mockEnver map[string]string

func (m mockEnver) Getenv(k string) string {
return m[k]
}

func ptr[T any](x T) *T {
return &x
}
29 changes: 29 additions & 0 deletions internal/edits/api.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
/**
# Copyright 2024 NVIDIA CORPORATION
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/

package edits

import (
"tags.cncf.io/container-device-interface/pkg/cdi"

"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)

type Interface interface {
EditsFromDiscoverer(discover.Discover) (*cdi.ContainerEdits, error)
SpecModifierFromDiscoverer(discover.Discover) (oci.SpecModifier, error)
}
42 changes: 40 additions & 2 deletions internal/edits/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
package edits

import (
"os"

"golang.org/x/sys/unix"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"

Expand All @@ -26,15 +29,23 @@ import (
type device discover.Device

// toEdits converts a discovered device to CDI Container Edits.
func (d device) toEdits() (*cdi.ContainerEdits, error) {
func (d device) toEdits(allowAdditionalGIDs bool) (*cdi.ContainerEdits, error) {
deviceNode, err := d.toSpec()
if err != nil {
return nil, err
}

var additionalGIDs []uint32
if allowAdditionalGIDs {
if requiredGID := d.getRequiredGID(); requiredGID != 0 {
additionalGIDs = append(additionalGIDs, requiredGID)
}
}

e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
DeviceNodes: []*specs.DeviceNode{deviceNode},
DeviceNodes: []*specs.DeviceNode{deviceNode},
AdditionalGIDs: additionalGIDs,
},
}
return &e, nil
Expand All @@ -52,10 +63,37 @@ func (d device) toSpec() (*specs.DeviceNode, error) {
if hostPath == d.Path {
hostPath = ""
}

s := specs.DeviceNode{
HostPath: hostPath,
Path: d.Path,
}

return &s, nil
}

// getRequiredGID returns the group id of the device if the device is not world read/writable.
// If the information cannot be extracted or an error occurs, 0 is returned.
func (d device) getRequiredGID() uint32 {
path := d.HostPath
if path == "" {
path = d.Path
}
if path == "" {
return 0
}

var stat unix.Stat_t
if err := unix.Lstat(path, &stat); err != nil {
return 0
}
// This is only supported for char devices
if stat.Mode&unix.S_IFMT != unix.S_IFCHR {
return 0
}

if permissionsForOther := os.FileMode(stat.Mode).Perm(); permissionsForOther&06 == 0 {
return stat.Gid
}
return 0
}
Loading
Loading