-
Notifications
You must be signed in to change notification settings - Fork 351
Default to cdi #910
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Default to cdi #910
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -125,8 +125,8 @@ func TestGoodInput(t *testing.T) { | |||||
// Check config.json for NVIDIA prestart hook | ||||||
spec, err = cfg.getRuntimeSpec() | ||||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json") | ||||||
require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json") | ||||||
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json") | ||||||
require.Empty(t, spec.Hooks, "there should be hooks in config.json") | ||||||
require.Equal(t, 0, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's invert the world There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
// NVIDIA prestart hook already present in config file | ||||||
|
@@ -171,8 +171,8 @@ func TestDuplicateHook(t *testing.T) { | |||||
// Check config.json for NVIDIA prestart hook | ||||||
spec, err = cfg.getRuntimeSpec() | ||||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json") | ||||||
require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json") | ||||||
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json") | ||||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json") | ||||||
require.Equal(t, 0, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json") | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
|
||||||
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for | ||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,11 +31,22 @@ import ( | |
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" | ||
) | ||
|
||
const ( | ||
automaticDeviceVendor = "runtime.nvidia.com" | ||
automaticDeviceClass = "gpu" | ||
automaticDeviceKind = automaticDeviceVendor + "/" + automaticDeviceClass | ||
) | ||
|
||
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the | ||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is | ||
// used to select the devices to include. | ||
func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { | ||
devices, err := getDevicesFromSpec(logger, ociSpec, cfg) | ||
func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, isJitCDI bool) (oci.SpecModifier, error) { | ||
defaultKind := cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind | ||
if isJitCDI { | ||
defaultKind = automaticDeviceKind | ||
} | ||
|
||
devices, err := getDevicesFromSpec(logger, ociSpec, cfg, defaultKind) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err) | ||
} | ||
|
@@ -65,7 +76,7 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spe | |
) | ||
} | ||
|
||
func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config) ([]string, error) { | ||
func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config, defaultKind string) ([]string, error) { | ||
rawSpec, err := ociSpec.Load() | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to load OCI spec: %v", err) | ||
|
@@ -83,26 +94,16 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C | |
if err != nil { | ||
return nil, err | ||
} | ||
if cfg.AcceptDeviceListAsVolumeMounts { | ||
mountDevices := container.CDIDevicesFromMounts() | ||
if len(mountDevices) > 0 { | ||
return mountDevices, nil | ||
} | ||
} | ||
|
||
var devices []string | ||
seen := make(map[string]bool) | ||
for _, name := range container.VisibleDevicesFromEnvVar() { | ||
if !parser.IsQualifiedName(name) { | ||
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name) | ||
} | ||
if seen[name] { | ||
logger.Debugf("Ignoring duplicate device %q", name) | ||
continue | ||
if cfg.AcceptDeviceListAsVolumeMounts { | ||
devices = normalizeDeviceList(logger, defaultKind, append(container.DevicesFromMounts(), container.CDIDevicesFromMounts()...)...) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before this change I notice that we were only calling There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ++ |
||
if len(devices) > 0 { | ||
return devices, nil | ||
} | ||
devices = append(devices, name) | ||
} | ||
|
||
devices = normalizeDeviceList(logger, defaultKind, container.VisibleDevicesFromEnvVar()...) | ||
if len(devices) == 0 { | ||
return nil, nil | ||
} | ||
|
@@ -116,6 +117,24 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C | |
return nil, nil | ||
} | ||
|
||
func normalizeDeviceList(logger logger.Interface, defaultKind string, devices ...string) []string { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe add a code comment showing an example of what this function is doing (supposed to do). |
||
seen := make(map[string]bool) | ||
var normalized []string | ||
for _, name := range devices { | ||
if !parser.IsQualifiedName(name) { | ||
name = fmt.Sprintf("%s=%s", defaultKind, name) | ||
} | ||
if seen[name] { | ||
logger.Debugf("Ignoring duplicate device %q", name) | ||
continue | ||
} | ||
normalized = append(normalized, name) | ||
seen[name] = true | ||
} | ||
|
||
return normalized | ||
} | ||
|
||
// getAnnotationDevices returns a list of devices specified in the annotations. | ||
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of | ||
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned. | ||
|
@@ -156,7 +175,7 @@ func filterAutomaticDevices(devices []string) []string { | |
var automatic []string | ||
for _, device := range devices { | ||
vendor, class, _ := parser.ParseDevice(device) | ||
if vendor == "runtime.nvidia.com" && class == "gpu" { | ||
if vendor == automaticDeviceVendor && class == automaticDeviceClass { | ||
automatic = append(automatic, device) | ||
} | ||
} | ||
|
@@ -165,6 +184,8 @@ func filterAutomaticDevices(devices []string) []string { | |
|
||
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) { | ||
logger.Debugf("Generating in-memory CDI specs for devices %v", devices) | ||
// TODO: We should try to load the kernel modules and create the device nodes here. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can edit this TODO now, after #976 (comment) |
||
// Failures should raise a warning and not error out. | ||
spec, err := generateAutomaticCDISpec(logger, cfg, devices) | ||
if err != nil { | ||
return nil, fmt.Errorf("failed to generate CDI spec: %w", err) | ||
|
This file was deleted.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.