Skip to content

Commit e892112

Browse files
committed
gpu: mount by-path directory
oneCCL requires the /dev/dri/by-path folder to be available to create a mapping between GPUs. Signed-off-by: Tuomas Katila <[email protected]>
1 parent 85b6795 commit e892112

File tree

3 files changed

+219
-7
lines changed

3 files changed

+219
-7
lines changed

cmd/gpu_plugin/gpu_plugin.go

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,12 @@ import (
3535
)
3636

3737
const (
38-
sysfsDrmDirectory = "/sys/class/drm"
39-
devfsDriDirectory = "/dev/dri"
40-
gpuDeviceRE = `^card[0-9]+$`
41-
controlDeviceRE = `^controlD[0-9]+$`
42-
vendorString = "0x8086"
38+
sysfsDrmDirectory = "/sys/class/drm"
39+
devfsDriDirectory = "/dev/dri"
40+
devfsBypathDirectory = "/dev/dri/by-path"
41+
gpuDeviceRE = `^card[0-9]+$`
42+
controlDeviceRE = `^controlD[0-9]+$`
43+
vendorString = "0x8086"
4344

4445
// Device plugin settings.
4546
namespace = "gpu.intel.com"
@@ -338,15 +339,18 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) {
338339
}
339340

340341
if len(nodes) > 0 {
341-
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)
342+
mounts := pluginutils.BypathMountsForPci(
343+
pluginutils.ReadPciAddressForCard(path.Join(sysfsDrmDirectory, f.Name())), devfsBypathDirectory)
344+
345+
deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, mounts, nil, nil)
342346

343347
for i := 0; i < dp.options.sharedDevNum; i++ {
344348
devID := fmt.Sprintf("%s-%d", f.Name(), i)
345349
// Currently only one device type (i915) is supported.
346350
// TODO: check model ID to differentiate device models.
347351
devTree.AddDevice(deviceType, devID, deviceInfo)
348352

349-
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, nil, nil)
353+
rmDevInfos[devID] = rm.NewDeviceInfo(nodes, mounts, nil)
350354
}
351355
}
352356
}

cmd/internal/pluginutils/bypath.go

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
// Copyright 2023 Intel Corporation. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package pluginutils
16+
17+
import (
18+
"os"
19+
"path"
20+
"strings"
21+
22+
pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1"
23+
)
24+
25+
// Returns a slice of by-path Mounts for a pci device with pciAddress.
26+
// by-path files are searched from the given bypathDir.
27+
// In the by-path dir, any files that start with "pci-<pci addr>" will be added to mounts.
28+
func BypathMountsForPci(pciAddress, bypathDir string) []pluginapi.Mount {
29+
var mounts []pluginapi.Mount
30+
31+
if pciAddress == "" {
32+
return nil
33+
}
34+
35+
files, err := os.ReadDir(bypathDir)
36+
if err != nil {
37+
return nil
38+
}
39+
40+
linkPrefix := "pci-" + pciAddress
41+
42+
for _, f := range files {
43+
if strings.HasPrefix(f.Name(), linkPrefix) {
44+
absPath := path.Join(bypathDir, f.Name())
45+
mounts = append(mounts, pluginapi.Mount{
46+
ContainerPath: absPath,
47+
HostPath: absPath,
48+
ReadOnly: true,
49+
})
50+
}
51+
}
52+
53+
return mounts
54+
}
55+
56+
// Returns the pci address for a drm card by reading the
57+
// symbolic link that the /sys/class/drm/cardX points to.
58+
// ../../devices/pci0000:00/0000:00:02.0/drm/card
59+
// -------------------------^^^^^^^^^^^^---------.
60+
func ReadPciAddressForCard(cardPath string) string {
61+
linkPath, err := os.Readlink(cardPath)
62+
if err != nil {
63+
return ""
64+
}
65+
66+
parts := strings.Split(linkPath, "/")
67+
l := len(parts)
68+
69+
if l < 3 {
70+
return ""
71+
}
72+
73+
if parts[l-2] != "drm" {
74+
return ""
75+
}
76+
77+
return parts[l-3]
78+
}
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
// Copyright 2023 Intel Corporation. All Rights Reserved.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package pluginutils
16+
17+
import (
18+
"os"
19+
"path"
20+
"path/filepath"
21+
"testing"
22+
23+
"k8s.io/utils/strings/slices"
24+
)
25+
26+
func createTestFiles(t *testing.T, root, linkFile string, bypathFiles []string) (string, string) {
27+
drmPath := path.Join(root, "sys/class/drm/card0")
28+
devPath := path.Join(root, "sys", linkFile)
29+
byPath := path.Join(root, "by-path")
30+
31+
if linkFile != "" {
32+
if err := os.MkdirAll(filepath.Dir(devPath), os.ModePerm); err != nil {
33+
t.Fatal("Couldn't create test dev dir", err)
34+
}
35+
36+
if err := os.MkdirAll(filepath.Dir(drmPath), os.ModePerm); err != nil {
37+
t.Fatal("Couldn't create test drm dir", err)
38+
}
39+
40+
if err := os.WriteFile(devPath, []byte{0}, os.ModePerm); err != nil {
41+
t.Fatal("Couldn't create card file", err)
42+
}
43+
44+
if err := os.Symlink(devPath, drmPath); err != nil {
45+
t.Fatal("Couldn't create symlink between pci path and sysfs drm path")
46+
}
47+
}
48+
49+
if len(bypathFiles) > 0 {
50+
if err := os.MkdirAll(byPath, os.ModePerm); err != nil {
51+
t.Fatal("Mkdir failed:", byPath)
52+
}
53+
54+
for _, f := range bypathFiles {
55+
if err := os.WriteFile(path.Join(byPath, f), []byte{1}, os.ModePerm); err != nil {
56+
t.Fatal("WriteFile failed:", path.Join(byPath, f))
57+
}
58+
}
59+
}
60+
61+
return drmPath, byPath
62+
}
63+
64+
func TestBypath(t *testing.T) {
65+
type testData struct {
66+
linkpath string
67+
bypathFiles []string
68+
mountCount int
69+
}
70+
71+
tds := []testData{
72+
{
73+
"00.10.2/00.334.302/0.0.1.00/0-1-2-3-3342/drm/card0",
74+
[]string{"pci-0-1-2-3-3342-card", "pci-0-1-2-3-3342-render"},
75+
2,
76+
},
77+
{
78+
"00.10.2/00.334.302/0.0.1.00/0-1-2-3-4343/drm/card0",
79+
[]string{"pci-0-1-2-3-4444-card", "pci-0-1-2-3-4444-render"},
80+
0,
81+
},
82+
{
83+
"00.10.2/00.334.302/0.0.1.00/0-1-2-3-3342/drm",
84+
[]string{"pci-0-1-2-3-4444-card", "pci-0-1-2-3-4444-render"},
85+
0,
86+
},
87+
{
88+
"",
89+
[]string{"pci-0-1-2-3-3342-card", "pci-0-1-2-3-3342-render"},
90+
0,
91+
},
92+
{
93+
"00.10.2/00.334.302/0.0.1.00/0-1-2-3-3342/drm/card0",
94+
[]string{},
95+
0,
96+
},
97+
}
98+
99+
for _, td := range tds {
100+
root, err := os.MkdirTemp("", "test_bypath_mounting")
101+
if err != nil {
102+
t.Fatalf("can't create temporary directory: %+v", err)
103+
}
104+
// dirs/files need to be removed for the next test
105+
defer os.RemoveAll(root)
106+
107+
drmPath, byPath := createTestFiles(t, root, td.linkpath, td.bypathFiles)
108+
109+
mounts := BypathMountsForPci(ReadPciAddressForCard(drmPath), byPath)
110+
111+
if len(mounts) != td.mountCount {
112+
t.Errorf("Wrong number of mounts %d vs. %d", len(mounts), td.mountCount)
113+
}
114+
115+
absPaths := []string{}
116+
for _, link := range td.bypathFiles {
117+
absPaths = append(absPaths, path.Join(byPath, link))
118+
}
119+
120+
for _, mount := range mounts {
121+
if !slices.Contains(absPaths, mount.ContainerPath) {
122+
t.Errorf("containerpath is incorrect: %s", mount.ContainerPath)
123+
}
124+
125+
if !slices.Contains(absPaths, mount.HostPath) {
126+
t.Errorf("hostpath is incorrect: %s", mount.HostPath)
127+
}
128+
}
129+
}
130+
}

0 commit comments

Comments
 (0)