Skip to content

Commit 302a14b

Browse files
committed
gpu: copy nfdhook functionality to gpu-plugin
In order to support v0.14+ NFD and extended resources, write node labels into a file and store it for NFD's features.d directory. The old functionality still exists in the initcontainer, but if it's used NFD has to be configured to allow it. Signed-off-by: Tuomas Katila <[email protected]>
1 parent a70651f commit 302a14b

21 files changed

+502
-93
lines changed

cmd/gpu_nfdhook/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ Table of Contents
1111

1212
## Introduction
1313

14+
***NOTE:*** NFD's binary hook support will be turned off by default in the 0.14 release. If one desires to use it then, NFD's default configuration has to be modified to allow binary hooks. Due to the change, GPU binary hook's functionality has been copied to GPU plugin.
15+
1416
This is the [Node Feature Discovery](https://github.com/kubernetes-sigs/node-feature-discovery)
1517
binary hook implementation for the Intel GPUs. The intel-gpu-initcontainer (which
1618
is built with the other images) can be used as part of the gpu-plugin deployment

cmd/gpu_nfdhook/main.go

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,7 @@
1515
package main
1616

1717
import (
18-
"os"
19-
20-
"k8s.io/klog/v2"
18+
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler"
2119
)
2220

2321
const (
@@ -27,13 +25,5 @@ const (
2725
)
2826

2927
func main() {
30-
l := newLabeler(sysfsDRMDirectory, debugfsDRIDirectory)
31-
32-
err := l.createLabels()
33-
if err != nil {
34-
klog.Errorf("%+v", err)
35-
os.Exit(1)
36-
}
37-
38-
l.printLabels()
28+
labeler.CreateAndPrintLabels(sysfsDRMDirectory, debugfsDRIDirectory)
3929
}

cmd/gpu_plugin/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ backend libraries can offload compute operations to GPU.
5252
| -resource-manager | - | disabled | Enable fractional resource management, [see also dependencies](#fractional-resources) |
5353
| -shared-dev-num | int | 1 | Number of containers that can share the same GPU device |
5454
| -allocation-policy | string | none | 3 possible values: balanced, packed, none. For shared-dev-num > 1: _balanced_ mode spreads workloads among GPU devices, _packed_ mode fills one GPU fully before moving to next, and _none_ selects first available device from kubelet. Default is _none_. Allocation policy does not have an effect when resource manager is enabled. |
55+
| -disable-resource-label-export | bool | false | Disable resource label export to NFD's features.d directory. |
56+
| -nfd-resource-filename | string | intel-gpu-resources.txt | Name for the NFD feature file. |
5557

5658
The plugin also accepts a number of other arguments (common to all plugins) related to logging.
5759
Please use the -h option to see the complete list of logging related options.

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 64 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017-2022 Intel Corporation. All Rights Reserved.
1+
// Copyright 2017-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -31,17 +31,20 @@ import (
3131
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
3232

3333
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/gpu_plugin/rm"
34+
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/labeler"
3435
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils"
3536
dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin"
3637
)
3738

3839
const (
39-
sysfsDrmDirectory = "/sys/class/drm"
40-
devfsDriDirectory = "/dev/dri"
41-
gpuDeviceRE = `^card[0-9]+$`
42-
controlDeviceRE = `^controlD[0-9]+$`
43-
pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$"
44-
vendorString = "0x8086"
40+
sysfsDrmDirectory = "/sys/class/drm"
41+
debugfsDRIDirectory = "/sys/kernel/debug/dri"
42+
devfsDriDirectory = "/dev/dri"
43+
nfdFeatureDir = "/etc/kubernetes/node-feature-discovery/features.d"
44+
gpuDeviceRE = `^card[0-9]+$`
45+
controlDeviceRE = `^controlD[0-9]+$`
46+
pciAddressRE = "^[0-9a-f]{4}:[0-9a-f]{2}:[0-9a-f]{2}\\.[0-9a-f]{1}$"
47+
vendorString = "0x8086"
4548

4649
// Device plugin settings.
4750
namespace = "gpu.intel.com"
@@ -53,13 +56,18 @@ const (
5356

5457
// Period of device scans.
5558
scanPeriod = 5 * time.Second
59+
60+
// Labeler's max update interval, 5min.
61+
labelerMaxInterval = 5 * 60 * time.Second
5662
)
5763

5864
type cliOptions struct {
59-
preferredAllocationPolicy string
60-
sharedDevNum int
61-
enableMonitoring bool
62-
resourceManagement bool
65+
preferredAllocationPolicy string
66+
nfdResourceFileName string
67+
sharedDevNum int
68+
enableMonitoring bool
69+
resourceManagement bool
70+
disableResourceLabelExport bool
6371
}
6472

6573
type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string
@@ -242,8 +250,9 @@ type devicePlugin struct {
242250
controlDeviceReg *regexp.Regexp
243251
pciAddressReg *regexp.Regexp
244252

245-
scanTicker *time.Ticker
246-
scanDone chan bool
253+
scanTicker *time.Ticker
254+
scanDone chan bool
255+
scanResources chan bool
247256

248257
resMan rm.ResourceManager
249258

@@ -255,21 +264,24 @@ type devicePlugin struct {
255264
policy preferredAllocationPolicyFunc
256265
options cliOptions
257266

258-
bypathFound bool
267+
bypathFound bool
268+
exportResourceLabels bool
259269
}
260270

261271
func newDevicePlugin(sysfsDir, devfsDir string, options cliOptions) *devicePlugin {
262272
dp := &devicePlugin{
263-
sysfsDir: sysfsDir,
264-
devfsDir: devfsDir,
265-
bypathDir: path.Join(devfsDir, "/by-path"),
266-
options: options,
267-
gpuDeviceReg: regexp.MustCompile(gpuDeviceRE),
268-
controlDeviceReg: regexp.MustCompile(controlDeviceRE),
269-
pciAddressReg: regexp.MustCompile(pciAddressRE),
270-
scanTicker: time.NewTicker(scanPeriod),
271-
scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it
272-
bypathFound: true,
273+
sysfsDir: sysfsDir,
274+
devfsDir: devfsDir,
275+
bypathDir: path.Join(devfsDir, "/by-path"),
276+
options: options,
277+
gpuDeviceReg: regexp.MustCompile(gpuDeviceRE),
278+
controlDeviceReg: regexp.MustCompile(controlDeviceRE),
279+
pciAddressReg: regexp.MustCompile(pciAddressRE),
280+
scanTicker: time.NewTicker(scanPeriod),
281+
scanDone: make(chan bool, 1), // buffered as we may send to it before Scan starts receiving from it
282+
bypathFound: true,
283+
scanResources: make(chan bool, 1),
284+
exportResourceLabels: !options.disableResourceLabelExport,
273285
}
274286

275287
if options.resourceManagement {
@@ -347,17 +359,26 @@ func (dp *devicePlugin) Scan(notifier dpapi.Notifier) error {
347359
klog.Warning("Failed to scan: ", err)
348360
}
349361

362+
countChanged := false
363+
350364
for name, prev := range previousCount {
351365
count := devTree.DeviceTypeCount(name)
352366
if count != prev {
353367
klog.V(1).Infof("GPU scan update: %d->%d '%s' resources found", prev, count, name)
354368

355369
previousCount[name] = count
370+
371+
countChanged = true
356372
}
357373
}
358374

359375
notifier.Notify(devTree)
360376

377+
// Trigger resource scan if it's enabled.
378+
if dp.exportResourceLabels && countChanged {
379+
dp.scanResources <- true
380+
}
381+
361382
select {
362383
case <-dp.scanDone:
363384
return nil
@@ -494,6 +515,8 @@ func main() {
494515
flag.BoolVar(&opts.resourceManagement, "resource-manager", false, "fractional GPU resource management")
495516
flag.IntVar(&opts.sharedDevNum, "shared-dev-num", 1, "number of containers sharing the same GPU device")
496517
flag.StringVar(&opts.preferredAllocationPolicy, "allocation-policy", "none", "modes of allocating GPU devices: balanced, packed and none")
518+
flag.StringVar(&opts.nfdResourceFileName, "nfd-resource-filename", "intel-gpu-resources.txt", "filename for writing labels under NFD's features.d directory")
519+
flag.BoolVar(&opts.disableResourceLabelExport, "disable-resource-label-export", false, "export labels from node's GPUs and store them for NFD")
497520
flag.Parse()
498521

499522
if opts.sharedDevNum < 1 {
@@ -512,9 +535,26 @@ func main() {
512535
os.Exit(1)
513536
}
514537

538+
if strings.Contains(opts.nfdResourceFileName, "/") {
539+
klog.Errorf("NFD resource filename shouldn't have any slashes (/): %s", opts.nfdResourceFileName)
540+
os.Exit(1)
541+
}
542+
515543
klog.V(1).Infof("GPU device plugin started with %s preferred allocation policy", opts.preferredAllocationPolicy)
516544

517545
plugin := newDevicePlugin(prefix+sysfsDrmDirectory, prefix+devfsDriDirectory, opts)
546+
547+
if plugin.exportResourceLabels {
548+
// Start labeler to export labels file for NFD.
549+
nfdFeatureFile := path.Join(nfdFeatureDir, path.Base(opts.nfdResourceFileName))
550+
551+
klog.V(2).Infof("NFD feature file location: %s", nfdFeatureFile)
552+
553+
// Labeler catches OS signals and calls os.Exit() after receiving any.
554+
go labeler.Run(prefix+sysfsDrmDirectory, prefix+debugfsDRIDirectory, nfdFeatureFile,
555+
labelerMaxInterval, plugin.scanResources)
556+
}
557+
518558
manager := dpapi.NewManager(namespace, plugin)
519559
manager.Run()
520560
}

cmd/gpu_plugin/gpu_plugin_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2017-2021 Intel Corporation. All Rights Reserved.
1+
// Copyright 2017-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

cmd/gpu_plugin/render-device.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#!/bin/sh
22
#
3-
# Copyright 2021 Intel Corporation.
3+
# Copyright 2021-2023 Intel Corporation.
44
#
55
# SPDX-License-Identifier: Apache-2.0
66
#

cmd/gpu_plugin/rm/gpu_plugin_resource_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021 Intel Corporation. All Rights Reserved.
1+
// Copyright 2021-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

cmd/gpu_plugin/rm/gpu_plugin_resource_manager_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2021 Intel Corporation. All Rights Reserved.
1+
// Copyright 2021-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.

cmd/gpu_nfdhook/labeler.go renamed to cmd/internal/labeler/labeler.go

Lines changed: 102 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2020-2021 Intel Corporation. All Rights Reserved.
1+
// Copyright 2020-2023 Intel Corporation. All Rights Reserved.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -12,18 +12,22 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
package main
15+
package labeler
1616

1717
import (
1818
"bufio"
1919
"fmt"
2020
"os"
21+
"os/signal"
2122
"path"
2223
"path/filepath"
24+
"reflect"
2325
"regexp"
2426
"sort"
2527
"strconv"
2628
"strings"
29+
"syscall"
30+
"time"
2731

2832
"github.com/intel/intel-device-plugins-for-kubernetes/cmd/internal/pluginutils"
2933
"github.com/pkg/errors"
@@ -58,6 +62,7 @@ type labeler struct {
5862

5963
sysfsDRMDir string
6064
debugfsDRIDir string
65+
labelsChanged bool
6166
}
6267

6368
func newLabeler(sysfsDRMDir, debugfsDRIDir string) *labeler {
@@ -67,6 +72,7 @@ func newLabeler(sysfsDRMDir, debugfsDRIDir string) *labeler {
6772
gpuDeviceReg: regexp.MustCompile(gpuDeviceRE),
6873
controlDeviceReg: regexp.MustCompile(controlDeviceRE),
6974
labels: labelMap{},
75+
labelsChanged: true,
7076
}
7177
}
7278

@@ -345,6 +351,10 @@ func (l *labeler) createPCIGroupLabel(gpuNumList []string) string {
345351

346352
// createLabels is the main function of plugin labeler, it creates label-value pairs for the gpus.
347353
func (l *labeler) createLabels() error {
354+
prevLabels := l.labels
355+
356+
l.labels = labelMap{}
357+
348358
gpuNameList, err := l.scan()
349359
if err != nil {
350360
return err
@@ -431,6 +441,8 @@ func (l *labeler) createLabels() error {
431441
}
432442
}
433443

444+
l.labelsChanged = !reflect.DeepEqual(prevLabels, l.labels)
445+
434446
return nil
435447
}
436448

@@ -455,8 +467,95 @@ func createNumaNodeMappingLabel(mapping map[int][]string) string {
455467
return strings.Join(parts, "_")
456468
}
457469

458-
func (l *labeler) printLabels() {
470+
func (l *labeler) printLabelsToFile(labelFile string) error {
471+
f, err := os.OpenFile(labelFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
472+
if err != nil {
473+
return fmt.Errorf("failed to open file (%s): %w", labelFile, err)
474+
}
475+
476+
defer f.Close()
477+
478+
for key, val := range l.labels {
479+
if _, err := f.WriteString(key + "=" + val + "\n"); err != nil {
480+
return fmt.Errorf("failed to write label (%s=%s) to file: %w", key, val, err)
481+
}
482+
}
483+
484+
return nil
485+
}
486+
487+
func CreateAndPrintLabels(sysfsDRMDir, debugfsDRIDir string) {
488+
l := newLabeler(sysfsDRMDir, debugfsDRIDir)
489+
490+
if err := l.createLabels(); err != nil {
491+
klog.Warningf("failed to create labels: %+v", err)
492+
493+
return
494+
}
495+
459496
for key, val := range l.labels {
460497
fmt.Println(key + "=" + val)
461498
}
462499
}
500+
501+
// Gathers node's GPU labels on channel trigger or timeout, and write them to a file.
502+
// The created label file is deleted on exit (process dying).
503+
func Run(sysfsDrmDir, debugfsDRIDir, nfdFeatureFile string, updateInterval time.Duration, scanResources chan bool) {
504+
l := newLabeler(sysfsDrmDir, debugfsDRIDir)
505+
506+
interruptChan := make(chan os.Signal, 1)
507+
signal.Notify(interruptChan, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP, syscall.SIGQUIT)
508+
509+
klog.V(1).Info("Starting GPU labeler")
510+
511+
running := true
512+
513+
for running {
514+
timeout := time.After(updateInterval)
515+
516+
select {
517+
case <-timeout:
518+
case <-scanResources:
519+
case interrupt := <-interruptChan:
520+
klog.V(2).Infof("Interrupt %d received", interrupt)
521+
522+
running = false
523+
524+
continue
525+
}
526+
527+
klog.V(1).Info("Ext resources scanning")
528+
529+
err := l.createLabels()
530+
if err != nil {
531+
klog.Warningf("label creation failed: %+v", err)
532+
533+
continue
534+
}
535+
536+
if l.labelsChanged {
537+
klog.V(1).Info("Writing labels")
538+
539+
if err := l.printLabelsToFile(nfdFeatureFile); err != nil {
540+
klog.Warningf("failed to write labels to file: %+v", err)
541+
542+
// Reset labels so that next time the labeler runs the writing is retried.
543+
l.labels = labelMap{}
544+
}
545+
}
546+
}
547+
548+
signal.Stop(interruptChan)
549+
550+
klog.V(2).Info("Removing label file")
551+
552+
err := os.Remove(nfdFeatureFile)
553+
if err != nil {
554+
klog.Errorf("Failed to cleanup label file: %+v", err)
555+
}
556+
557+
klog.V(1).Info("Stopping GPU labeler")
558+
559+
// Close the whole application
560+
os.Exit(0)
561+
}

0 commit comments

Comments
 (0)