Skip to content

Commit

Permalink
Nvidia settings API for container runtime
Browse files Browse the repository at this point in the history
Signed-off-by: Monirul Islam <[email protected]>
  • Loading branch information
fedora Cloud User authored and monirul committed May 28, 2024
1 parent c4b17dd commit 2adc490
Show file tree
Hide file tree
Showing 13 changed files with 97 additions and 11 deletions.
1 change: 1 addition & 0 deletions Release.toml
Original file line number Diff line number Diff line change
Expand Up @@ -314,4 +314,5 @@ version = "1.21.0"
]
"(1.20.0, 1.21.0)" = [
"migrate_v1.21.0_pluto-remove-generators-v0-1-0.lz4",
"migrate_v1.21.0_container-runtime-nvidia-k8s.lz4"
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
[required-extensions]
kubernetes = "v1"

+++
accept-nvidia-visible-devices-as-volume-mounts = {{settings.kubernetes.nvidia.container-runtime.visible-devices-as-volume-mounts}}
accept-nvidia-visible-devices-envvar-when-unprivileged = {{settings.kubernetes.nvidia.container-runtime.visible-devices-envvar-when-unprivileged}}

[nvidia-container-cli]
root = "/"
path = "/usr/bin/nvidia-container-cli"
environment = []
ldconfig = "@/sbin/ldconfig"

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
C /etc/nvidia-container-runtime/config.toml - - - - /usr/share/factory/nvidia-container-runtime/nvidia-container-toolkit-config-k8s.toml
d /etc/nvidia-container-runtime - - - - -
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ License: Apache-2.0
URL: https://%{goimport}

Source0: https://%{goimport}/archive/v%{gover}/nvidia-container-toolkit-%{gover}.tar.gz
Source1: nvidia-container-toolkit-config-k8s.toml
Source1: nvidia-container-toolkit-config-k8s
Source2: nvidia-container-toolkit-config-ecs.toml
Source3: nvidia-oci-hooks-json
Source4: nvidia-gpu-devices.rules
Expand Down Expand Up @@ -82,5 +82,5 @@ ln -s shimpei %{buildroot}%{_cross_bindir}/nvidia-oci
%{_cross_tmpfilesdir}/nvidia-container-toolkit-ecs.conf

%files k8s
%{_cross_factorydir}/nvidia-container-runtime/nvidia-container-toolkit-config-k8s.toml
%{_cross_factorydir}/nvidia-container-runtime/nvidia-container-toolkit-config-k8s
%{_cross_tmpfilesdir}/nvidia-container-toolkit-k8s.conf
8 changes: 8 additions & 0 deletions sources/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sources/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ members = [
"api/migration/migrations/v1.20.0/aws-control-container-v0-7-12",
"api/migration/migrations/v1.20.0/public-control-container-v0-7-12",
"api/migration/migrations/v1.21.0/pluto-remove-generators-v0-1-0",
"api/migration/migrations/v1.21.0/container-runtime-nvidia-k8s",

"bloodhound",

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "container-runtime-nvidia-k8s"
version = "0.1.0"
edition = "2021"
authors = ["Monirul Islam <[email protected]>"]
license = "Apache-2.0 OR MIT"
publish = false
# Don't rebuild crate just because of changes to README.
exclude = ["README.md"]

[dependencies]
migration-helpers = { path = "../../../migration-helpers", version = "0.1.0"}

[build-dependencies]
bottlerocket-variant = { version = "0.1", path = "../../../../../bottlerocket-variant" }
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
use bottlerocket_variant::Variant;

fn main() {
let variant = Variant::from_env().unwrap();
variant.emit_cfgs();
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
use migration_helpers::common_migrations::{AddPrefixesMigration, NoOpMigration};
use migration_helpers::{migrate, Result};
use std::process;

/// We added a new setting for configuring container runtime (containerd) settings only for NVIDIA k8s variants.
fn run() -> Result<()> {
if cfg!(variant_family = "aws-k8s") && cfg!(variant_flavor = "nvidia") {
migrate(AddPrefixesMigration(vec![
"settings.kubernetes.nvidia.container-runtime",
]))
} else {
migrate(NoOpMigration)
}
}

// Returning a Result from main makes it print a Debug representation of the error, but with Snafu
// we have nice Display representations of the error, so we wrap "main" (run) and print any error.
// https://github.com/shepmaster/snafu/issues/110
fn main() {
if let Err(e) = run() {
eprintln!("{}", e);
process::exit(1);
}
}
14 changes: 14 additions & 0 deletions sources/models/shared-defaults/nvidia-k8s-container-toolkit.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[settings.kubernetes.nvidia.container-runtime]
visible-devices-as-volume-mounts = false
visible-devices-envvar-when-unprivileged = true

[metadata.settings.kubernetes.nvidia.container-runtime]
affected-services = ["nvidia-container-toolkit"]

[services.nvidia-container-toolkit]
configuration-files = ["nvidia-container-toolkit"]
restart-commands = []

[configuration-files.nvidia-container-toolkit]
path = "/etc/nvidia-container-runtime/config.toml"
template-path = "/usr/share/factory/nvidia-container-runtime/nvidia-container-toolkit-config-k8s"
12 changes: 12 additions & 0 deletions sources/models/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,7 @@ struct KubernetesSettings {
hostname_override: ValidLinuxHostname,
// Generated in `k8s-1.25+` variants only
seccomp_default: bool,
nvidia: K8sNvidiaSettings,
}

// ECS settings.
Expand Down Expand Up @@ -572,3 +573,14 @@ struct Report {
name: String,
description: String,
}

#[model]
struct K8sNvidiaSettings {
container_runtime: K8sContainerRuntimeSettings,
}

#[model]
struct K8sContainerRuntimeSettings {
visible_devices_as_volume_mounts: bool,
visible_devices_envvar_when_unprivileged: bool,
}

0 comments on commit 2adc490

Please sign in to comment.