Skip to content

Commit 482ed7b

Browse files
authored
Merge pull request #939 from hj-johannes-lee/qat-allocation-policy
qat: implement preferredAllocation policies
2 parents 89a359e + d3c8063 commit 482ed7b

File tree

8 files changed

+172
-5
lines changed

8 files changed

+172
-5
lines changed

cmd/qat_plugin/README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ The QAT plugin can take a number of command line arguments, summarised in the fo
5353
| -kernel-vf-drivers | string | Comma separated VF Device Driver of the QuickAssist Devices in the system. Devices supported: DH895xCC, C62x, C3xxx, 4xxx, C4xxx and D15xx (default: `c6xxvf,4xxxvf`) |
5454
| -max-num-devices | int | maximum number of QAT devices to be provided to the QuickAssist device plugin (default: `32`) |
5555
| -mode | string | plugin mode which can be either `dpdk` or `kernel` (default: `dpdk`) |
56+
| -allocation-policy | string | 2 possible values: balanced and packed. Balanced mode spreads allocated QAT VF resources balanced among QAT PF devices, and packed mode packs one QAT PF device full of QAT VF resources before allocating resources from the next QAT PF. (There is no default.) |
5657

5758
The plugin also accepts a number of other arguments related to logging. Please use the `-h` option to see
5859
the complete list of logging related options.

cmd/qat_plugin/dpdkdrv/dpdkdrv.go

Lines changed: 106 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@ package dpdkdrv
1717

1818
import (
1919
"bytes"
20+
"flag"
2021
"fmt"
2122
"os"
2223
"path/filepath"
24+
"sort"
2325
"strconv"
2426
"strings"
2527
"time"
@@ -62,11 +64,54 @@ var qatDeviceDriver = map[string]string{
6264
"6f55": "d15xxvf",
6365
}
6466

67+
// swapBDF returns ["C1:B1:A1", "C2:B2:A2"], when the given parameter is ["A1:B1:C1", "A2:B2:C2"``].
68+
func swapBDF(devstrings []string) []string {
69+
result := make([]string, len(devstrings))
70+
71+
for n, dev := range devstrings {
72+
tmp := strings.Split(dev, ":")
73+
result[n] = fmt.Sprintf("%v:%v:%v", tmp[2], tmp[1], tmp[0])
74+
}
75+
76+
return result
77+
}
78+
79+
type preferredAllocationPolicyFunc func(*pluginapi.ContainerPreferredAllocationRequest) []string
80+
81+
// nonePolicy is used when no policy is specified.
82+
func nonePolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
83+
deviceIds := req.AvailableDeviceIDs
84+
85+
return deviceIds[:req.AllocationSize]
86+
}
87+
88+
// balancedPolicy is used for allocating QAT devices in balance.
89+
func balancedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
90+
// make it "FDB" and string sort and change back to "BDF"
91+
deviceIds := swapBDF(req.AvailableDeviceIDs)
92+
sort.Strings(deviceIds)
93+
deviceIds = swapBDF(deviceIds)
94+
95+
return deviceIds[:req.AllocationSize]
96+
}
97+
98+
// packedPolicy is used for allocating QAT PF devices one by one.
99+
func packedPolicy(req *pluginapi.ContainerPreferredAllocationRequest) []string {
100+
deviceIds := req.AvailableDeviceIDs
101+
sort.Strings(deviceIds)
102+
deviceIds = deviceIds[:req.AllocationSize]
103+
104+
return deviceIds
105+
}
106+
65107
// DevicePlugin represents vfio based QAT plugin.
66108
type DevicePlugin struct {
67109
scanTicker *time.Ticker
68110
scanDone chan bool
69111

112+
// Note: If restarting the plugin with a new policy, the allocations for existing pods remain with old policy.
113+
policy preferredAllocationPolicyFunc
114+
70115
pciDriverDir string
71116
pciDeviceDir string
72117
dpdkDriver string
@@ -75,7 +120,7 @@ type DevicePlugin struct {
75120
}
76121

77122
// NewDevicePlugin returns new instance of vfio based QAT plugin.
78-
func NewDevicePlugin(maxDevices int, kernelVfDrivers string, dpdkDriver string) (*DevicePlugin, error) {
123+
func NewDevicePlugin(maxDevices int, kernelVfDrivers string, dpdkDriver string, preferredAllocationPolicy string) (*DevicePlugin, error) {
79124
if !isValidDpdkDeviceDriver(dpdkDriver) {
80125
return nil, errors.Errorf("wrong DPDK device driver: %s", dpdkDriver)
81126
}
@@ -87,10 +132,42 @@ func NewDevicePlugin(maxDevices int, kernelVfDrivers string, dpdkDriver string)
87132
}
88133
}
89134

90-
return newDevicePlugin(pciDriverDirectory, pciDeviceDirectory, maxDevices, kernelDrivers, dpdkDriver), nil
135+
allocationPolicyFunc := getAllocationPolicy(preferredAllocationPolicy)
136+
if allocationPolicyFunc == nil {
137+
return nil, errors.Errorf("wrong allocation policy: %s", preferredAllocationPolicy)
138+
}
139+
140+
return newDevicePlugin(pciDriverDirectory, pciDeviceDirectory, maxDevices, kernelDrivers, dpdkDriver, allocationPolicyFunc), nil
91141
}
92142

93-
func newDevicePlugin(pciDriverDir, pciDeviceDir string, maxDevices int, kernelVfDrivers []string, dpdkDriver string) *DevicePlugin {
143+
//getAllocationPolicy returns a func that fits the policy given as a parameter. It returns nonePolicy when the flag is not set, and it returns nil when the policy is not valid value.
144+
func getAllocationPolicy(preferredAllocationPolicy string) preferredAllocationPolicyFunc {
145+
switch {
146+
case !isFlagSet("allocation-policy"):
147+
return nonePolicy
148+
case preferredAllocationPolicy == "packed":
149+
return packedPolicy
150+
case preferredAllocationPolicy == "balanced":
151+
return balancedPolicy
152+
default:
153+
return nil
154+
}
155+
}
156+
157+
// isFlagSet returns true when the flag that has the same name as the parameter is set.
158+
func isFlagSet(name string) bool {
159+
set := false
160+
161+
flag.Visit(func(f *flag.Flag) {
162+
if f.Name == name {
163+
set = true
164+
}
165+
})
166+
167+
return set
168+
}
169+
170+
func newDevicePlugin(pciDriverDir, pciDeviceDir string, maxDevices int, kernelVfDrivers []string, dpdkDriver string, preferredAllocationPolicyFunc preferredAllocationPolicyFunc) *DevicePlugin {
94171
return &DevicePlugin{
95172
maxDevices: maxDevices,
96173
pciDriverDir: pciDriverDir,
@@ -99,6 +176,7 @@ func newDevicePlugin(pciDriverDir, pciDeviceDir string, maxDevices int, kernelVf
99176
dpdkDriver: dpdkDriver,
100177
scanTicker: time.NewTicker(scanPeriod),
101178
scanDone: make(chan bool, 1),
179+
policy: preferredAllocationPolicyFunc,
102180
}
103181
}
104182

@@ -143,6 +221,31 @@ func (dp *DevicePlugin) Scan(notifier dpapi.Notifier) error {
143221
}
144222
}
145223

224+
// Implement the PreferredAllocator interface.
225+
func (dp *DevicePlugin) GetPreferredAllocation(rqt *pluginapi.PreferredAllocationRequest) (*pluginapi.PreferredAllocationResponse, error) {
226+
response := &pluginapi.PreferredAllocationResponse{}
227+
228+
for _, req := range rqt.ContainerRequests {
229+
// Add a security check here. This should never happen unless there occurs error in kubelet device plugin manager.
230+
if req.AllocationSize > int32(len(req.AvailableDeviceIDs)) {
231+
var err = errors.Errorf("AllocationSize (%d) is greater than the number of available device IDs (%d)", req.AllocationSize, len(req.AvailableDeviceIDs))
232+
return nil, err
233+
}
234+
235+
IDs := dp.policy(req)
236+
klog.V(3).Infof("AvailableDeviceIDs: %q", req.AvailableDeviceIDs)
237+
klog.V(3).Infof("AllocatedDeviceIDs: %q", IDs)
238+
239+
resp := &pluginapi.ContainerPreferredAllocationResponse{
240+
DeviceIDs: IDs,
241+
}
242+
243+
response.ContainerResponses = append(response.ContainerResponses, resp)
244+
}
245+
246+
return response, nil
247+
}
248+
146249
func (dp *DevicePlugin) getDpdkDevice(vfBdf string) (string, error) {
147250
switch dp.dpdkDriver {
148251
case igbUio:

cmd/qat_plugin/dpdkdrv/dpdkdrv_test.go

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"flag"
1919
"os"
2020
"path"
21+
"reflect"
2122
"testing"
2223

2324
"github.com/pkg/errors"
@@ -95,7 +96,7 @@ func TestNewDevicePlugin(t *testing.T) {
9596
}
9697
for _, tt := range tcases {
9798
t.Run(tt.name, func(t *testing.T) {
98-
_, err := NewDevicePlugin(1, tt.kernelVfDrivers, tt.dpdkDriver)
99+
_, err := NewDevicePlugin(1, tt.kernelVfDrivers, tt.dpdkDriver, "")
99100

100101
if tt.expectedErr && err == nil {
101102
t.Errorf("Test case '%s': expected error", tt.name)
@@ -119,6 +120,46 @@ func (n *fakeNotifier) Notify(newDeviceTree dpapi.DeviceTree) {
119120
n.scanDone <- true
120121
}
121122

123+
func TestGetPreferredAllocation(t *testing.T) {
124+
rqt := &pluginapi.PreferredAllocationRequest{
125+
ContainerRequests: []*pluginapi.ContainerPreferredAllocationRequest{
126+
{
127+
AvailableDeviceIDs: []string{"0000:03:00.4", "0000:04:00.1", "0000:05:00.3", "0000:05:00.4", "0000:05:00.1", "0000:04:00.0", "0000:04:00.4", "0000:06:00.4", "0000:04:00.2", "0000:03:00.1", "0000:05:00.0", "0000:05:00.2", "0000:04:00.3", "0000:03:00.2", "0000:06:00.0", "0000:06:00.3", "0000:03:00.3", "0000:03:00.0", "0000:06:00.1", "0000:06:00.2"},
128+
AllocationSize: 4,
129+
},
130+
},
131+
}
132+
133+
plugin := newDevicePlugin("", "", 4, []string{""}, "", nonePolicy)
134+
response, _ := plugin.GetPreferredAllocation(rqt)
135+
136+
if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"0000:03:00.4", "0000:04:00.1", "0000:05:00.3", "0000:05:00.4"}) {
137+
t.Error("Unexpected return value for balanced preferred allocation")
138+
}
139+
140+
plugin = newDevicePlugin("", "", 4, []string{""}, "", packedPolicy)
141+
response, _ = plugin.GetPreferredAllocation(rqt)
142+
143+
if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"0000:03:00.0", "0000:03:00.1", "0000:03:00.2", "0000:03:00.3"}) {
144+
t.Error("Unexpected return value for balanced preferred allocation")
145+
}
146+
147+
plugin = newDevicePlugin("", "", 4, []string{""}, "", balancedPolicy)
148+
response, _ = plugin.GetPreferredAllocation(rqt)
149+
150+
if !reflect.DeepEqual(response.ContainerResponses[0].DeviceIDs, []string{"0000:03:00.0", "0000:04:00.0", "0000:05:00.0", "0000:06:00.0"}) {
151+
t.Error("Unexpected return value for balanced preferred allocation")
152+
}
153+
154+
rqt.ContainerRequests[0].AllocationSize = 32
155+
plugin = newDevicePlugin("", "", 4, []string{""}, "", nil)
156+
_, err := plugin.GetPreferredAllocation(rqt)
157+
158+
if err == nil {
159+
t.Error("Unexpected nil value return for err when AllocationSize is greater than the number of available device IDs")
160+
}
161+
}
162+
122163
func TestScan(t *testing.T) {
123164
tcases := []struct {
124165
name string
@@ -405,6 +446,7 @@ func TestScan(t *testing.T) {
405446
tt.maxDevNum,
406447
tt.kernelVfDrivers,
407448
tt.dpdkDriver,
449+
nil,
408450
)
409451

410452
fN := fakeNotifier{

cmd/qat_plugin/qat_plugin.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,13 @@ func main() {
4141

4242
dpdkDriver := flag.String("dpdk-driver", "vfio-pci", "DPDK Device driver for configuring the QAT device")
4343
kernelVfDrivers := flag.String("kernel-vf-drivers", "c6xxvf,4xxxvf", "Comma separated VF Device Driver of the QuickAssist Devices in the system. Devices supported: DH895xCC, C62x, C3xxx, C4xxx, 4xxx, and D15xx")
44+
preferredAllocationPolicy := flag.String("allocation-policy", "", "Modes of allocating QAT devices: balanced and packed")
4445
maxNumDevices := flag.Int("max-num-devices", 32, "maximum number of QAT devices to be provided to the QuickAssist device plugin")
4546
flag.Parse()
4647

4748
switch *mode {
4849
case "dpdk":
49-
plugin, err = dpdkdrv.NewDevicePlugin(*maxNumDevices, *kernelVfDrivers, *dpdkDriver)
50+
plugin, err = dpdkdrv.NewDevicePlugin(*maxNumDevices, *kernelVfDrivers, *dpdkDriver, *preferredAllocationPolicy)
5051
case "kernel":
5152
plugin = kerneldrv.NewDevicePlugin()
5253
default:

deployments/operator/crd/bases/deviceplugin.intel.com_qatdeviceplugins.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,14 @@ spec:
9494
description: NodeSelector provides a simple way to constrain device
9595
plugin pods to nodes with particular labels.
9696
type: object
97+
preferredAllocationPolicy:
98+
description: PreferredAllocationPolicy sets the mode of allocating
99+
QAT devices on a node. See documentation for detailed description
100+
of the policies.
101+
enum:
102+
- balanced
103+
- packed
104+
type: string
97105
type: object
98106
status:
99107
description: 'QatDevicePluginStatus defines the observed state of QatDevicePlugin.

pkg/apis/deviceplugin/v1/qatdeviceplugin_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ type QatDevicePluginSpec struct {
3535

3636
// InitImage is a container image with a script that initialize devices.
3737
InitImage string `json:"initImage,omitempty"`
38+
// PreferredAllocationPolicy sets the mode of allocating QAT devices on a node.
39+
// See documentation for detailed description of the policies.
40+
// +kubebuilder:validation:Enum=balanced;packed
41+
PreferredAllocationPolicy string `json:"preferredAllocationPolicy,omitempty"`
3842

3943
// DpdkDriver is a DPDK device driver for configuring the QAT device.
4044
// +kubebuilder:validation:Enum=igb_uio;vfio-pci

pkg/controllers/qat/controller.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -280,5 +280,9 @@ func getPodArgs(qdp *devicepluginv1.QatDevicePlugin) []string {
280280
args = append(args, "-max-num-devices", "32")
281281
}
282282

283+
if qdp.Spec.PreferredAllocationPolicy != "" {
284+
args = append(args, "-allocation-policy", qdp.Spec.PreferredAllocationPolicy)
285+
}
286+
283287
return args
284288
}

test/envtest/qatdeviceplugin_controller_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ var _ = Describe("QatDevicePlugin Controller", func() {
9696
updatedDpdkDriver := "igb_uio"
9797
updatedKernelVfDrivers := "c3xxxvf"
9898
updatedMaxNumDevices := 16
99+
updatedPreferredAllocationPolicy := "balanced"
99100
updatedNodeSelector := map[string]string{"updated-qat-nodeselector": "true"}
100101

101102
fetched.Spec.Image = updatedImage
@@ -104,6 +105,7 @@ var _ = Describe("QatDevicePlugin Controller", func() {
104105
fetched.Spec.DpdkDriver = updatedDpdkDriver
105106
fetched.Spec.KernelVfDrivers = []devicepluginv1.KernelVfDriver{devicepluginv1.KernelVfDriver(updatedKernelVfDrivers)}
106107
fetched.Spec.MaxNumDevices = updatedMaxNumDevices
108+
fetched.Spec.PreferredAllocationPolicy = updatedPreferredAllocationPolicy
107109
fetched.Spec.NodeSelector = updatedNodeSelector
108110

109111
Expect(k8sClient.Update(context.Background(), fetched)).Should(Succeed())
@@ -126,6 +128,8 @@ var _ = Describe("QatDevicePlugin Controller", func() {
126128
updatedKernelVfDrivers,
127129
"-max-num-devices",
128130
strconv.Itoa(updatedMaxNumDevices),
131+
"-allocation-policy",
132+
updatedPreferredAllocationPolicy,
129133
}
130134

131135
Expect(ds.Spec.Template.Spec.Containers[0].Args).Should(ConsistOf(expectArgs))

0 commit comments

Comments
 (0)