diff --git a/README.md b/README.md index bc81fad78..68ad9fce3 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ The FPGA plugin comes as three parts. - the [device plugin](#device-plugin) - the [admission controller](#admission-controller) -- the [CRIO-O prestart hook](#cri-o-prestart-hook) +- the [OCI createRuntime hook](#OCI-createRuntime-hook) Refer to each individual sub-components documentation for more details. Brief overviews of the sub-components are below. @@ -88,9 +88,9 @@ is responsible for performing mapping from user-friendly function IDs to the Interface ID and Bitstream ID that are required for FPGA programming. It also implements access control by namespacing FPGA configuration information. -#### CRI-O Prestart Hook +#### OCI createRuntime Hook -The [FPGA prestart CRI-O hook](cmd/fpga_crihook/README.md) performs discovery +The [FPGA OCI createRuntime hook](cmd/fpga_crihook/README.md) performs discovery of the requested FPGA function bitstream and programs FPGA devices based on the environment variables in the workload description. diff --git a/build/docker/intel-fpga-initcontainer.Dockerfile b/build/docker/intel-fpga-initcontainer.Dockerfile index cc0d4dbf8..9b5c736ea 100644 --- a/build/docker/intel-fpga-initcontainer.Dockerfile +++ b/build/docker/intel-fpga-initcontainer.Dockerfile @@ -66,13 +66,6 @@ RUN install -D ${DIR}/LICENSE /install_root/licenses/intel-device-plugins-for-ku --save_path /install_root/licenses/$CMD/go-licenses ; \ else mkdir -p /install_root/licenses/$CMD/go-licenses/ && cd licenses/$CMD && cp -r * /install_root/licenses/$CMD/go-licenses/ ; fi ### -ARG SRC_DIR=/usr/local/fpga-sw -ARG DST_DIR=/opt/intel/fpga-sw -RUN echo "{\n\ - \"hook\" : \"$DST_DIR/$CRI_HOOK\",\n\ - \"stage\" : [ \"prestart\" ],\n\ - \"annotation\": [ \"fpga.intel.com/region\" ]\n\ -}\n">>/install_root/$SRC_DIR/$CRI_HOOK.json ARG TOYBOX_VERSION="0.8.11" ARG TOYBOX_SHA256="83a3a88cbe1fa30f099c2f58295baef4637aaf988085aaea56e03aa29168175d" ARG ROOT=/install_root @@ -93,7 +86,7 @@ LABEL vendor='Intel®' LABEL version='devel' LABEL release='1' LABEL name='intel-fpga-initcontainer' -LABEL summary='Intel® FPGA programming CRI hook for Kubernetes' -LABEL description='The FPGA prestart CRI-O hook performs discovery of the requested FPGA function bitstream and programs FPGA devices based on the environment variables in the workload description' +LABEL summary='Intel® FPGA programming CDI hook for Kubernetes' +LABEL description='The FPGA OCI createRuntime hook performs discovery of the requested FPGA function bitstream and programs FPGA devices based on the environment variables in the workload description' COPY --from=builder /install_root / -ENTRYPOINT [ "/usr/bin/sh", "-c", "cp -a /usr/local/fpga-sw/* /opt/intel/fpga-sw/ && ln -sf /opt/intel/fpga-sw/intel-fpga-crihook.json /etc/containers/oci/hooks.d/" ] +ENTRYPOINT [ "/usr/bin/sh", "-c", "cp -a /usr/local/fpga-sw/* /opt/intel/fpga-sw/" ] diff --git a/build/docker/templates/intel-fpga-initcontainer.Dockerfile.in b/build/docker/templates/intel-fpga-initcontainer.Dockerfile.in index 198ed6a82..cf162cc3b 100644 --- a/build/docker/templates/intel-fpga-initcontainer.Dockerfile.in +++ b/build/docker/templates/intel-fpga-initcontainer.Dockerfile.in @@ -14,15 +14,6 @@ ARG CMD=fpga_tool ARG EP=/usr/local/fpga-sw/$CMD #include "default_build.docker" -ARG SRC_DIR=/usr/local/fpga-sw -ARG DST_DIR=/opt/intel/fpga-sw - -RUN echo "{\n\N - \"hook\" : \"$DST_DIR/$CRI_HOOK\",\n\N - \"stage\" : [ \"prestart\" ],\n\N - \"annotation\": [ \"fpga.intel.com/region\" ]\n\N -}\n">>/install_root/$SRC_DIR/$CRI_HOOK.json - #include "toybox_build.docker" FROM ${FINAL_BASE} @@ -30,9 +21,9 @@ FROM ${FINAL_BASE} #include "default_labels.docker" LABEL name='intel-fpga-initcontainer' -LABEL summary='Intel® FPGA programming CRI hook for Kubernetes' -LABEL description='The FPGA prestart CRI-O hook performs discovery of the requested FPGA function bitstream and programs FPGA devices based on the environment variables in the workload description' +LABEL summary='Intel® FPGA programming CDI hook for Kubernetes' +LABEL description='The FPGA OCI createRuntime hook performs discovery of the requested FPGA function bitstream and programs FPGA devices based on the environment variables in the workload description' COPY --from=builder /install_root / -ENTRYPOINT [ "/usr/bin/sh", "-c", "cp -a /usr/local/fpga-sw/* /opt/intel/fpga-sw/ && ln -sf /opt/intel/fpga-sw/intel-fpga-crihook.json /etc/containers/oci/hooks.d/" ] +ENTRYPOINT [ "/usr/bin/sh", "-c", "cp -a /usr/local/fpga-sw/* /opt/intel/fpga-sw/" ] diff --git a/cmd/dlb_plugin/dlb_plugin.go b/cmd/dlb_plugin/dlb_plugin.go index d185da664..ba6ef70ef 100644 --- a/cmd/dlb_plugin/dlb_plugin.go +++ b/cmd/dlb_plugin/dlb_plugin.go @@ -88,7 +88,7 @@ func (dp *DevicePlugin) scan() dpapi.DeviceTree { ContainerPath: file, Permissions: "rw", }} - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devs, nil, nil, nil) + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devs, nil, nil, nil, nil) sysfsDev := filepath.Join(dp.sysfsDir, filepath.Base(file)) sriovNumVFs := pluginutils.GetSriovNumVFs(sysfsDev) diff --git a/cmd/fpga_admissionwebhook/README.md b/cmd/fpga_admissionwebhook/README.md index 89daa3b0d..779177b36 100644 --- a/cmd/fpga_admissionwebhook/README.md +++ b/cmd/fpga_admissionwebhook/README.md @@ -23,7 +23,7 @@ devices to Kubernetes. The FPGA admission controller webhook is responsible for performing mapping from user-friendly function IDs to the Interface ID and Bitstream ID that are required for FPGA programming by -the [FPGA CRI-O hook](../fpga_crihook/README.md). +the [FPGA OCI createRuntime hook](../fpga_crihook/README.md). Mappings are stored in namespaced custom resource definition (CRD) objects, therefore the admission controller also performs access control, determining which bitstream can be used for which namespace. @@ -31,7 +31,7 @@ More details can be found in the [Mappings](#mappings) section. The admission controller also keeps the user from bypassing namespaced mapping restrictions, by denying admission of any pods that are trying to use internal knowledge of InterfaceID or -Bitstream ID environment variables used by the prestart hook. +Bitstream ID environment variables used by the createRuntime hook. ## Dependencies @@ -39,7 +39,7 @@ This component is one of a set of components that work together. You may also wa install the following: - [FPGA device plugin](../fpga_plugin/README.md) -- [FPGA prestart CRI-O hook](../fpga_crihook/README.md) +- [FPGA OCI createRuntime hook](../fpga_crihook/README.md) All components have the same basic dependencies as the [generic plugin framework dependencies](../../README.md#about) @@ -129,7 +129,7 @@ The same mapping, but with its mode field set to `region`, would translate and the corresponding AF IDs are set in environment variables for the container. Though in this case the cluster administrator would probably want to rename the mapping `arria10.dcp1.2-nlb0-preprogrammed` to something like `arria10.dcp1.2-nlb0-orchestrated` -to reflect its mode. The [FPGA CRI-O hook](../fpga_crihook/README.md) then loads the requested +to reflect its mode. The [FPGA OCI createRuntime hook](../fpga_crihook/README.md) then loads the requested bitstream to a region before the container is started. Mappings of resource names are configured with objects of `AcceleratorFunction` and @@ -183,4 +183,4 @@ and they are applicable to pods created in the corresponding namespaces. ## Next steps -Continue with [FPGA prestart CRI-O hook](../fpga_crihook/README.md). +Continue with [FPGA OCI createRuntime hook](../fpga_crihook/README.md). diff --git a/cmd/fpga_crihook/README.md b/cmd/fpga_crihook/README.md index 59c26fd8a..015f8cb3e 100644 --- a/cmd/fpga_crihook/README.md +++ b/cmd/fpga_crihook/README.md @@ -1,27 +1,26 @@ -# Intel FPGA prestart CRI-O webhook for Kubernetes +# Intel FPGA OCI createRuntime hook for Kubernetes Table of Contents * [Introduction](#introduction) * [Dependencies](#dependencies) -* [Configuring CRI-O](#configuring-cri-o) +* [Configuring CRI runtimes](#configuring-cri-runtimes) ## Introduction -The FPGA CRI-O webhook is one of the components used to add support for Intel FPGA +The FPGA CDI hook is one of the components used to add support for Intel FPGA devices to Kubernetes. -The FPGA prestart CRI-O hook is triggered by container annotations, such as set by the -[FPGA device plugin](../fpga_plugin/README.md). It performs discovery of the requested FPGA -function bitstream and then programs FPGA devices based on the environment variables -in the workload description. +The FPGA OCI createRuntime hook is passed by the [FPGA device plugin](../fpga_plugin/README.md) as +a CDI device attribute to the Kubelet and then to the CRI runtime. +It performs discovery of the requested FPGA function bitstream and then programs FPGA devices +based on the environment variables in the workload description. -The CRI-O prestart hook is only *required* when the -[FPGA admission webhook](../fpga_admissionwebhook/README.md) is configured for orchestration -programmed mode, and is benign (un-used) otherwise. +The hook is only *required* when the [FPGA admission webhook](../fpga_admissionwebhook/README.md) +is configured for orchestration programmed mode, and is benign (un-used) otherwise. -> **Note:** The fpga CRI-O webhook is usually installed by the same DaemonSet as the -> FPGA device plugin. If building and installing the CRI-O webhook by hand, it is +> **Note:** The fpga CDI hook is usually installed by the same DaemonSet as the +> FPGA device plugin. If building and installing the hook by hand, it is > recommended you reference the > [fpga plugin DaemonSet YAML](/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml ) for > more details. @@ -39,11 +38,8 @@ All components have the same basic dependencies as the See [the development guide](../../DEVEL.md) for details if you want to deploy a customized version of the CRI hook. -## Configuring CRI-O +## Configuring CRI runtimes -Recent versions of [CRI-O](https://github.com/cri-o/cri-o) are shipped with default configuration -file that prevents CRI-O to discover and configure hooks automatically. -For FPGA orchestration programmed mode, the OCI hooks are the key component. -Please ensure that your `/etc/crio/crio.conf` parameter `hooks_dir` is either unset -(to enable default search paths for OCI hooks configuration) or contains the directory -`/etc/containers/oci/hooks.d`. +CDI should be enabled for the CRI runtime to call the hook. CRI-O has it enabled by +default and for Containerd it should be enabled explicitly in its configuration file as +explained in the [CDI documentation](https://github.com/cncf-tags/container-device-interface?tab=readme-ov-file#how-to-configure-cdi) diff --git a/cmd/fpga_crihook/main.go b/cmd/fpga_crihook/main.go index b071de3f4..2fb81cdfc 100644 --- a/cmd/fpga_crihook/main.go +++ b/cmd/fpga_crihook/main.go @@ -32,9 +32,6 @@ const ( configJSON = "config.json" fpgaRegionEnvPrefix = "FPGA_REGION_" fpgaAfuEnvPrefix = "FPGA_AFU_" - - annotationName = "com.intel.fpga.mode" - annotationValue = "fpga.intel.com/region" ) // Stdin defines structure for standard JSONed input of the OCI platform hook. @@ -220,16 +217,6 @@ func getStdin(reader io.Reader) (*Stdin, error) { return nil, err } - // Check if device plugin annotation is set - if stdinJ.Annotations.ComIntelFpgaMode == "" { - return nil, errors.Errorf("annotation %s is not set", annotationName) - } - - // Check if device plugin annotation is set - if stdinJ.Annotations.ComIntelFpgaMode != annotationValue { - return nil, errors.Errorf("annotation %s has incorrect value '%s'", annotationName, stdinJ.Annotations.ComIntelFpgaMode) - } - if stdinJ.Bundle == "" { return nil, errors.New("'bundle' field is not set in the stdin JSON") } diff --git a/cmd/fpga_crihook/main_test.go b/cmd/fpga_crihook/main_test.go index d95594e3f..02f2d7373 100644 --- a/cmd/fpga_crihook/main_test.go +++ b/cmd/fpga_crihook/main_test.go @@ -73,16 +73,6 @@ func TestGetStdin(t *testing.T) { stdinJSON: "stdin-incorrect-JSON.json", expectedErr: true, }, - { - name: "no annotations", - stdinJSON: "stdin-no-annotations.json", - expectedErr: true, - }, - { - name: "annotation is not set", - stdinJSON: "stdin-incorrect-intel-annotation.json", - expectedErr: true, - }, } for _, tc := range tcases { t.Run(tc.name, func(t *testing.T) { diff --git a/cmd/fpga_crihook/testdata/config-correct.json b/cmd/fpga_crihook/testdata/config-correct.json index c79c0f890..b64236a45 100644 --- a/cmd/fpga_crihook/testdata/config-correct.json +++ b/cmd/fpga_crihook/testdata/config-correct.json @@ -52,14 +52,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-FPGA-devices.json b/cmd/fpga_crihook/testdata/config-no-FPGA-devices.json index 653fe05f5..e831e1d89 100644 --- a/cmd/fpga_crihook/testdata/config-no-FPGA-devices.json +++ b/cmd/fpga_crihook/testdata/config-no-FPGA-devices.json @@ -52,14 +52,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-afu.json b/cmd/fpga_crihook/testdata/config-no-afu.json index ecd4b098c..90ad32358 100644 --- a/cmd/fpga_crihook/testdata/config-no-afu.json +++ b/cmd/fpga_crihook/testdata/config-no-afu.json @@ -51,14 +51,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-devices.json b/cmd/fpga_crihook/testdata/config-no-devices.json index f3109756c..7fcaf4266 100644 --- a/cmd/fpga_crihook/testdata/config-no-devices.json +++ b/cmd/fpga_crihook/testdata/config-no-devices.json @@ -52,14 +52,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-env.json b/cmd/fpga_crihook/testdata/config-no-env.json index df4b2c302..c7242556c 100644 --- a/cmd/fpga_crihook/testdata/config-no-env.json +++ b/cmd/fpga_crihook/testdata/config-no-env.json @@ -42,14 +42,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-linux.json b/cmd/fpga_crihook/testdata/config-no-linux.json index 070caf0de..90c3ab8ff 100644 --- a/cmd/fpga_crihook/testdata/config-no-linux.json +++ b/cmd/fpga_crihook/testdata/config-no-linux.json @@ -52,14 +52,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-process.json b/cmd/fpga_crihook/testdata/config-no-process.json index 659c445a4..979898071 100644 --- a/cmd/fpga_crihook/testdata/config-no-process.json +++ b/cmd/fpga_crihook/testdata/config-no-process.json @@ -31,14 +31,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-no-region.json b/cmd/fpga_crihook/testdata/config-no-region.json index 766c77ceb..213586546 100644 --- a/cmd/fpga_crihook/testdata/config-no-region.json +++ b/cmd/fpga_crihook/testdata/config-no-region.json @@ -51,14 +51,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-non-existing-bitstream.json b/cmd/fpga_crihook/testdata/config-non-existing-bitstream.json index 70c767651..6e9ba1098 100644 --- a/cmd/fpga_crihook/testdata/config-non-existing-bitstream.json +++ b/cmd/fpga_crihook/testdata/config-non-existing-bitstream.json @@ -52,14 +52,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/config-region-afu-dont-match.json b/cmd/fpga_crihook/testdata/config-region-afu-dont-match.json index b7e072f77..4728f01a8 100644 --- a/cmd/fpga_crihook/testdata/config-region-afu-dont-match.json +++ b/cmd/fpga_crihook/testdata/config-region-afu-dont-match.json @@ -53,14 +53,14 @@ } ], "hooks": { - "prestart": [ + "createRuntime": [ { "path": "/usr/local/bin/fpga_crihook", "args": [ "/usr/local/bin/fpga_crihook" ], "env": [ - "stage=prestart" + "stage=createRuntime" ] } ] diff --git a/cmd/fpga_crihook/testdata/stdin-incorrect-intel-annotation.json b/cmd/fpga_crihook/testdata/stdin-incorrect-intel-annotation.json deleted file mode 100644 index ca977451f..000000000 --- a/cmd/fpga_crihook/testdata/stdin-incorrect-intel-annotation.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "annotations": { - "io.kubernetes.container.hash": "b202a1fa", - "io.kubernetes.container.name": "test-container", - "io.kubernetes.container.restartCount": "0", - "io.kubernetes.pod.name": "test-fpga-region", - "io.kubernetes.pod.namespace": "default", - "io.kubernetes.pod.terminationGracePeriod": "30", - "io.kubernetes.pod.uid": "942e94c1-72d3-11e8-b221-c81f66f62fcc", - "com.intel.fpga.mode": "incorrect value" - }, - "bundle": "testdata", - "id": "1c40dd8efd268a47d7fb9f75d00f50c20af49d07d8d3c5fb948e68abb6d5ecf9", - "ociVersion": "1.0.0", - "pid": 39638, - "status": "" -} diff --git a/cmd/fpga_crihook/testdata/stdin-no-annotations.json b/cmd/fpga_crihook/testdata/stdin-no-annotations.json deleted file mode 100644 index 704c5ce94..000000000 --- a/cmd/fpga_crihook/testdata/stdin-no-annotations.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "bundle": "testdata", - "id": "1c40dd8efd268a47d7fb9f75d00f50c20af49d07d8d3c5fb948e68abb6d5ecf9", - "ociVersion": "1.0.0", - "pid": 39638, - "status": "" -} diff --git a/cmd/fpga_plugin/README.md b/cmd/fpga_plugin/README.md index b726e461c..95d291e1e 100644 --- a/cmd/fpga_plugin/README.md +++ b/cmd/fpga_plugin/README.md @@ -47,7 +47,7 @@ Kubernetes: which can be used to dynamically convert logical resource names in pod specifications into actual FPGA resource names, as advertised by the device plugin. - The webhook can also set environment variables to instruct the CRI-O prestart hook to program the FPGA + The webhook can also set environment variables to instruct the OCI createRuntime hook to program the FPGA before launching the container. > **NOTE:** Installation of the [FPGA admission controller webhook](../fpga_admissionwebhook/README.md) can be skipped if the @@ -55,10 +55,12 @@ Kubernetes: > since it integrates the controller's functionality. > However, [the mappings](../fpga_admissionwebhook/README.md#mappings-deployment) still must be deployed." -- [FPGA CRI-O prestart hook](../fpga_crihook/README.md) +- [FPGA OCI createRuntime hook](../fpga_crihook/README.md) - A [CRI-O](https://github.com/cri-o/cri-o) prestart hook that, upon instruction from the FPGA admission - controller, allocates and programs the FPGA before the container is launched. + An [OCI](https://github.com/opencontainers/runtime-spec/blob/main/config.md#createRuntime-hooks) createRuntime hook that, + upon instruction from the FPGA admission controller, programs the FPGA before the container is launched. + The FPGA plugin uses [Container Device Interface](https://github.com/cncf-tags/container-device-interface) to pass the hook + to the Kubelet. The repository also contains an [FPGA helper tool](../fpga_tool/README.md) that may be useful during development, initial deployment and debugging. @@ -102,15 +104,14 @@ major components: - [FPGA device plugin](README.md) (this component) - [FPGA admission controller webhook](../fpga_admissionwebhook/README.md) -- [FPGA prestart CRI-O hook](../fpga_crihook/README.md) +- [FPGA OCI createRuntime hook](../fpga_crihook/README.md) -The CRI-O hook is only *required* if `region` mode is being used, but is installed by default by the +The CDI hook is only *required* if `region` mode is being used, but is installed by default by the [FPGA plugin DaemonSet YAML](/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml), and is benign in `af` mode. -If using the `af` mode, and therefore *not* using the -CRI-O prestart hook, runtimes other than CRI-O can be used (that is, the CRI-O hook presently -*only* works with the CRI-O runtime). +If using the `af` mode, and therefore *not* using the OCI createRuntime hook, any runtime can be used +(that is, the CDI is not supported by all runtimes). The FPGA device plugin requires a Linux Kernel FPGA driver to be installed and enabled to operate. The plugin supports the use of either of following two drivers, and auto detects @@ -147,7 +148,7 @@ The following images are available on the Docker hub: - [The FPGA plugin](https://hub.docker.com/r/intel/intel-fpga-plugin) - [The FPGA admisson webhook](https://hub.docker.com/r/intel/intel-fpga-admissionwebhook) -- [The FPGA CRI-O prestart hook (in the `initcontainer` image)](https://hub.docker.com/r/intel/intel-fpga-initcontainer) +- [The FPGA OCI createRuntime hook (in the `initcontainer` image)](https://hub.docker.com/r/intel/intel-fpga-initcontainer) Depending on the FPGA mode, run either ```bash @@ -207,7 +208,7 @@ $ kubectl annotate node 'fpga.intel.com/device-plugin-mode=af' And restart the pods on the nodes. > **Note:** The FPGA plugin [DaemonSet YAML](/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml) -> also deploys the [FPGA CRI-O hook](../fpga_crihook/README.md) `initcontainer` image, but it will be +> also deploys the [FPGA OCI createRuntime hook](../fpga_crihook/README.md) `initcontainer` image, but it will be > benign (un-used) when running the FPGA plugin in `af` mode. #### Verify Plugin Registration @@ -223,6 +224,6 @@ fpga.intel.com/region-ce48969398f05f33946d560708be108a: 1 ``` > **Note:** The FPGA plugin [DaemonSet YAML](/deployments/fpga_plugin/fpga_plugin.yaml) -> also deploys the [FPGA CRI-O hook](../fpga_crihook/README.md) `initcontainer` image as well. You may +> also deploys the [FPGA OCI createRuntime hook](../fpga_crihook/README.md) `initcontainer` image as well. You may > also wish to build that image locally before deploying the FPGA plugin to avoid deploying > the Docker hub default image. diff --git a/cmd/fpga_plugin/dfl_test.go b/cmd/fpga_plugin/dfl_test.go index eb8b8207a..7403c0413 100644 --- a/cmd/fpga_plugin/dfl_test.go +++ b/cmd/fpga_plugin/dfl_test.go @@ -23,6 +23,7 @@ import ( dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" "github.com/intel/intel-device-plugins-for-kubernetes/pkg/fpga" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) func TestNewDevicePluginDFL(t *testing.T) { @@ -139,7 +140,7 @@ func TestGetRegionDevelTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -158,7 +159,7 @@ func TestGetRegionDevelTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -177,7 +178,7 @@ func TestGetRegionDevelTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, nil)) result := getRegionDevelTree(getDevicesDFL()) if !reflect.DeepEqual(result, expected) { @@ -187,6 +188,12 @@ func TestGetRegionDevelTreeDFL(t *testing.T) { func TestGetRegionTreeDFL(t *testing.T) { expected := dpapi.NewDeviceTree() + hooks := []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + } nodes := []pluginapi.DeviceSpec{ { HostPath: "/dev/dfl-port.0", @@ -194,7 +201,7 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) nodes = []pluginapi.DeviceSpec{ { @@ -208,7 +215,7 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "region2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) nodes = []pluginapi.DeviceSpec{ { @@ -222,7 +229,7 @@ func TestGetRegionTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "region3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, hooks)) result := getRegionTree(getDevicesDFL()) if !reflect.DeepEqual(result, expected) { @@ -239,7 +246,7 @@ func TestGetAfuTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -249,7 +256,7 @@ func TestGetAfuTreeDFL(t *testing.T) { }, } - expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -258,7 +265,7 @@ func TestGetAfuTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "dfl-port.2", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -267,7 +274,7 @@ func TestGetAfuTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-fff.fff.__________________________________________8", "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice("af-fff.fff.__________________________________________8", "dfl-port.3", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -276,7 +283,7 @@ func TestGetAfuTreeDFL(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-fff.fff.__________________________________________8", "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice("af-fff.fff.__________________________________________8", "dfl-port.4", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, nil)) result := getAfuTree(getDevicesDFL()) if !reflect.DeepEqual(result, expected) { diff --git a/cmd/fpga_plugin/fpga_plugin.go b/cmd/fpga_plugin/fpga_plugin.go index e8b44d33f..67d3ca686 100644 --- a/cmd/fpga_plugin/fpga_plugin.go +++ b/cmd/fpga_plugin/fpga_plugin.go @@ -24,6 +24,7 @@ import ( "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" "github.com/intel/intel-device-plugins-for-kubernetes/pkg/fpga" @@ -50,6 +51,10 @@ const ( // Period of device scans. scanPeriod = 5 * time.Second + + // CDI hook attributes. + HookName = "createRuntime" + HookPath = "/opt/intel/fpga-sw/intel-fpga-crihook" ) type newPortFunc func(fname string) (fpga.Port, error) @@ -83,7 +88,7 @@ func getRegionDevelTree(devices []device) dpapi.DeviceTree { Permissions: "rw", } - regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil)) + regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil, nil)) } } @@ -112,7 +117,14 @@ func getRegionTree(devices []device) dpapi.DeviceTree { } } - regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil)) + hooks := []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + } + + regionTree.AddDevice(devType, region.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil, hooks)) } } @@ -144,7 +156,7 @@ func getAfuTree(devices []device) dpapi.DeviceTree { Permissions: "rw", }, } - afuTree.AddDevice(devType, afu.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil)) + afuTree.AddDevice(devType, afu.id, dpapi.NewDeviceInfo(health, devNodes, nil, nil, nil, nil)) } } } diff --git a/cmd/fpga_plugin/opae_test.go b/cmd/fpga_plugin/opae_test.go index c469de848..35a8dc86b 100644 --- a/cmd/fpga_plugin/opae_test.go +++ b/cmd/fpga_plugin/opae_test.go @@ -23,6 +23,7 @@ import ( dpapi "github.com/intel/intel-device-plugins-for-kubernetes/pkg/deviceplugin" "github.com/intel/intel-device-plugins-for-kubernetes/pkg/fpga" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) func TestNewDevicePluginOPAE(t *testing.T) { @@ -129,7 +130,7 @@ func TestGetRegionDevelTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -143,7 +144,7 @@ func TestGetRegionDevelTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -157,7 +158,7 @@ func TestGetRegionDevelTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, nil)) result := getRegionDevelTree(getDevicesOPAE()) if !reflect.DeepEqual(result, expected) { @@ -167,6 +168,12 @@ func TestGetRegionDevelTreeOPAE(t *testing.T) { func TestGetRegionTreeOPAE(t *testing.T) { expected := dpapi.NewDeviceTree() + hooks := []*cdispec.Hook{ + { + HookName: HookName, + Path: HookPath, + }, + } nodes := []pluginapi.DeviceSpec{ { HostPath: "/dev/intel-fpga-port.0", @@ -174,7 +181,7 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) nodes = []pluginapi.DeviceSpec{ { @@ -183,7 +190,7 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-ce48969398f05f33946d560708be108a", "intel-fpga-fme.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, hooks)) nodes = []pluginapi.DeviceSpec{ { @@ -192,7 +199,7 @@ func TestGetRegionTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice(regionMode+"-"+unhealthyInterfaceID, "intel-fpga-fme.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, hooks)) result := getRegionTree(getDevicesOPAE()) if !reflect.DeepEqual(result, expected) { @@ -209,7 +216,7 @@ func TestGetAfuTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "intel-fpga-port.0", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -218,7 +225,7 @@ func TestGetAfuTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil)) + expected.AddDevice("af-ce4.d84.zkiWk5jwXzOUbVYHCL4QithCTcSko8QT-J5DNoP5BAs", "intel-fpga-port.1", dpapi.NewDeviceInfo(pluginapi.Healthy, nodes, nil, nil, nil, nil)) nodes = []pluginapi.DeviceSpec{ { @@ -227,7 +234,7 @@ func TestGetAfuTreeOPAE(t *testing.T) { Permissions: "rw", }, } - expected.AddDevice("af-fff.fff.__________________________________________8", "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil)) + expected.AddDevice("af-fff.fff.__________________________________________8", "intel-fpga-port.2", dpapi.NewDeviceInfo(pluginapi.Unhealthy, nodes, nil, nil, nil, nil)) result := getAfuTree(getDevicesOPAE()) if !reflect.DeepEqual(result, expected) { diff --git a/cmd/gpu_plugin/gpu_plugin.go b/cmd/gpu_plugin/gpu_plugin.go index 44c504263..5350c98e3 100644 --- a/cmd/gpu_plugin/gpu_plugin.go +++ b/cmd/gpu_plugin/gpu_plugin.go @@ -514,7 +514,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { mounts = dp.bypathMountsForPci(cardPath, name, dp.bypathDir) } - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devSpecs, mounts, nil, nil) + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devSpecs, mounts, nil, nil, nil) for i := 0; i < dp.options.sharedDevNum; i++ { devID := fmt.Sprintf("%s-%d", name, i) @@ -534,7 +534,7 @@ func (dp *devicePlugin) scan() (dpapi.DeviceTree, error) { // all Intel GPUs are under single monitoring resource per KMD if len(monitor) > 0 { for resourceName, devices := range monitor { - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devices, nil, nil, nil) + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devices, nil, nil, nil, nil) devTree.AddDevice(resourceName, monitorID, deviceInfo) } } diff --git a/cmd/qat_plugin/dpdkdrv/dpdkdrv.go b/cmd/qat_plugin/dpdkdrv/dpdkdrv.go index e4a9fd7c1..ba981d6a3 100644 --- a/cmd/qat_plugin/dpdkdrv/dpdkdrv.go +++ b/cmd/qat_plugin/dpdkdrv/dpdkdrv.go @@ -662,7 +662,7 @@ func (dp *DevicePlugin) scan() (dpapi.DeviceTree, error) { fmt.Sprintf("%s%d", envVarPrefix, n): vfBdf, } - devinfo := dpapi.NewDeviceInfo(healthiness, dp.getDpdkDeviceSpecs(dpdkDeviceName), dp.getDpdkMounts(dpdkDeviceName), envs, nil) + devinfo := dpapi.NewDeviceInfo(healthiness, dp.getDpdkDeviceSpecs(dpdkDeviceName), dp.getDpdkMounts(dpdkDeviceName), envs, nil, nil) devTree.AddDevice(cap, vfBdf, devinfo) } diff --git a/cmd/qat_plugin/kerneldrv/kerneldrv.go b/cmd/qat_plugin/kerneldrv/kerneldrv.go index cfed3021b..2bc1bc11d 100644 --- a/cmd/qat_plugin/kerneldrv/kerneldrv.go +++ b/cmd/qat_plugin/kerneldrv/kerneldrv.go @@ -102,7 +102,7 @@ func getDevTree(sysfs string, qatDevs []device, config map[string]section) (dpap // The rest should use QAT_SECTION_NAME_XXX variables. "QAT_SECTION_NAME": sname, } - deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devs, nil, envs, nil) + deviceInfo := dpapi.NewDeviceInfo(pluginapi.Healthy, devs, nil, envs, nil, nil) devTree.AddDevice(devType, fmt.Sprintf("%s_%s_%d", sname, ep.id, i), deviceInfo) uniqID++ diff --git a/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml b/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml index f68e12e6e..aa4d56239 100644 --- a/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml +++ b/deployments/fpga_plugin/base/intel-fpga-plugin-daemonset.yaml @@ -24,8 +24,6 @@ spec: volumeMounts: - mountPath: /opt/intel/fpga-sw name: intel-fpga-sw - - mountPath: /etc/containers/oci/hooks.d - name: oci-hooks-config containers: - name: intel-fpga-plugin env: @@ -50,6 +48,8 @@ spec: readOnly: true - name: kubeletsockets mountPath: /var/lib/kubelet/device-plugins + - name: cdidir + mountPath: /var/run/cdi volumes: - name: devfs hostPath: @@ -64,9 +64,9 @@ spec: hostPath: path: /opt/intel/fpga-sw type: DirectoryOrCreate - - name: oci-hooks-config + - name: cdidir hostPath: - path: /etc/containers/oci/hooks.d + path: /var/run/cdi type: DirectoryOrCreate nodeSelector: kubernetes.io/arch: amd64 diff --git a/go.mod b/go.mod index 47aa7cf9b..314ba700d 100644 --- a/go.mod +++ b/go.mod @@ -74,6 +74,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect github.com/opencontainers/go-digest v1.0.0 // indirect + github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 // indirect github.com/prometheus/client_golang v1.18.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect github.com/spf13/cobra v1.7.0 // indirect @@ -122,6 +123,7 @@ require ( sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect + tags.cncf.io/container-device-interface/specs-go v0.7.0 // indirect ) replace ( diff --git a/go.sum b/go.sum index 6ef248538..e251a9ade 100644 --- a/go.sum +++ b/go.sum @@ -148,6 +148,8 @@ github.com/onsi/gomega v1.33.1 h1:dsYjIxxSR755MDmKVsaFQTE22ChNBcuuTWgkUDSubOk= github.com/onsi/gomega v1.33.1/go.mod h1:U4R44UsT+9eLIaYRB2a5qajjtQYn0hauxvRm16AVYg0= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= +github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78 h1:R5M2qXZiK/mWPMT4VldCOiSL9HIAMuxQZWdG0CSM5+4= +github.com/opencontainers/runtime-spec v1.0.3-0.20220909204839-494a5a6aca78/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= @@ -367,3 +369,5 @@ sigs.k8s.io/structured-merge-diff/v4 v4.4.1 h1:150L+0vs/8DA78h1u02ooW1/fFq/Lwr+s sigs.k8s.io/structured-merge-diff/v4 v4.4.1/go.mod h1:N8hJocpFajUSSeSJ9bOZ77VzejKZaXsTtZo4/u7Io08= sigs.k8s.io/yaml v1.4.0 h1:Mk1wCc2gy/F0THH0TAp1QYyJNzRm2KCLy3o5ASXVI5E= sigs.k8s.io/yaml v1.4.0/go.mod h1:Ejl7/uTz7PSA4eKMyQCUTnhZYNmLIl+5c2lQPGR2BPY= +tags.cncf.io/container-device-interface/specs-go v0.7.0 h1:w/maMGVeLP6TIQJVYT5pbqTi8SCw/iHZ+n4ignuGHqg= +tags.cncf.io/container-device-interface/specs-go v0.7.0/go.mod h1:hMAwAbMZyBLdmYqWgYcKH0F/yctNpV3P35f+/088A80= diff --git a/pkg/controllers/fpga/controller_test.go b/pkg/controllers/fpga/controller_test.go index da7e06218..f4e2b5714 100644 --- a/pkg/controllers/fpga/controller_test.go +++ b/pkg/controllers/fpga/controller_test.go @@ -99,6 +99,10 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet MountPath: "/var/lib/kubelet/device-plugins", Name: "kubeletsockets", }, + { + MountPath: "/var/run/cdi", + Name: "cdidir", + }, }, }, }, @@ -116,10 +120,6 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet MountPath: "/opt/intel/fpga-sw", Name: "intel-fpga-sw", }, - { - MountPath: "/etc/containers/oci/hooks.d", - Name: "oci-hooks-config", - }, }, }, }, @@ -159,10 +159,10 @@ func (c *controller) newDaemonSetExpected(rawObj client.Object) *apps.DaemonSet }, }, { - Name: "oci-hooks-config", + Name: "cdidir", VolumeSource: v1.VolumeSource{ HostPath: &v1.HostPathVolumeSource{ - Path: "/etc/containers/oci/hooks.d", + Path: "/var/run/cdi", Type: &directoryOrCreate, }, }, diff --git a/pkg/deviceplugin/api.go b/pkg/deviceplugin/api.go index c9f420ff0..06d0e0557 100644 --- a/pkg/deviceplugin/api.go +++ b/pkg/deviceplugin/api.go @@ -19,6 +19,7 @@ import ( "github.com/intel/intel-device-plugins-for-kubernetes/pkg/topology" "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) // DeviceInfo contains information about device maintained by Device Plugin. @@ -29,6 +30,9 @@ type DeviceInfo struct { topology *pluginapi.TopologyInfo state string nodes []pluginapi.DeviceSpec + // Hooks can be passed only through CDI + // https://github.com/kubernetes/enhancements/tree/master/keps/sig-node/4009-add-cdi-devices-to-device-plugin-api + hooks []*cdispec.Hook } // UseDefaultMethodError allows the plugin to request running the default @@ -45,13 +49,14 @@ func init() { } // NewDeviceInfo makes DeviceInfo struct and adds topology information to it. -func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginapi.Mount, envs map[string]string, annotations map[string]string) DeviceInfo { +func NewDeviceInfo(state string, nodes []pluginapi.DeviceSpec, mounts []pluginapi.Mount, envs, annotations map[string]string, hooks []*cdispec.Hook) DeviceInfo { deviceInfo := DeviceInfo{ state: state, nodes: nodes, mounts: mounts, envs: envs, annotations: annotations, + hooks: hooks, } devPaths := []string{} diff --git a/pkg/deviceplugin/server.go b/pkg/deviceplugin/server.go index 7f02b79e8..773070b89 100644 --- a/pkg/deviceplugin/server.go +++ b/pkg/deviceplugin/server.go @@ -16,6 +16,8 @@ package deviceplugin import ( "context" + "encoding/json" + "fmt" "net" "os" "path" @@ -30,6 +32,7 @@ import ( "k8s.io/klog/v2" pluginapi "k8s.io/kubelet/pkg/apis/deviceplugin/v1beta1" + cdispec "tags.cncf.io/container-device-interface/specs-go" ) type serverState int @@ -39,6 +42,9 @@ const ( uninitialized serverState = iota serving terminating + CDIVersion = "0.5.0" // Kubernetes 1.27 / CRI-O 1.27 / Containerd 1.7 use this version. + CDIKind = "intel.cdi.k8s.io/device" + CDIDir = "/var/run/cdi" ) // devicePluginServer maintains a gRPC server satisfying @@ -129,6 +135,37 @@ func (srv *server) ListAndWatch(empty *pluginapi.Empty, stream pluginapi.DeviceP return nil } +func generateCDIDevices(deviceID string, dev *DeviceInfo) ([]*pluginapi.CDIDevice, error) { + if len(dev.hooks) == 0 { + return nil, nil + } + + spec := cdispec.Spec{ + Version: CDIVersion, + Kind: CDIKind, + Devices: []cdispec.Device{ + { + Name: deviceID, + ContainerEdits: cdispec.ContainerEdits{ + Hooks: dev.hooks, + }, + }, + }, + } + + jsonSpec, err := json.Marshal(spec) + if err != nil { + return nil, err + } + + cdiFileName := path.Join(CDIDir, deviceID) + ".json" + if err = os.WriteFile(cdiFileName, jsonSpec, 0o600); err != nil { + return nil, err + } + + return []*pluginapi.CDIDevice{{Name: fmt.Sprintf("%s=%s", CDIKind, deviceID)}}, nil +} + func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) (*pluginapi.AllocateResponse, error) { if srv.allocate != nil { response, err := srv.allocate(rqt) @@ -145,6 +182,7 @@ func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) cresp.Envs = map[string]string{} cresp.Annotations = map[string]string{} + cresp.CDIDevices = []*pluginapi.CDIDevice{} for _, id := range crqt.DevicesIDs { dev, ok := srv.devices[id] @@ -171,6 +209,13 @@ func (srv *server) Allocate(ctx context.Context, rqt *pluginapi.AllocateRequest) for key, value := range dev.annotations { cresp.Annotations[key] = value } + + CDIDevices, err := generateCDIDevices(id, &dev) + if err != nil { + return nil, fmt.Errorf("device %s: cannot generate CDI device: %w", id, err) + } + + cresp.CDIDevices = append(cresp.CDIDevices, CDIDevices...) } response.ContainerResponses = append(response.ContainerResponses, cresp) diff --git a/pkg/idxd/plugin.go b/pkg/idxd/plugin.go index 0a5c55869..8163782e5 100644 --- a/pkg/idxd/plugin.go +++ b/pkg/idxd/plugin.go @@ -200,7 +200,7 @@ func (dp *DevicePlugin) scan() (dpapi.DeviceTree, error) { for i := 0; i < amount; i++ { deviceType := fmt.Sprintf("wq-%s-%s", wqType, wqMode) deviceID := fmt.Sprintf("%s-%s-%d", deviceType, wqName, i) - devTree.AddDevice(deviceType, deviceID, dpapi.NewDeviceInfo(pluginapi.Healthy, devNodes, nil, nil, nil)) + devTree.AddDevice(deviceType, deviceID, dpapi.NewDeviceInfo(pluginapi.Healthy, devNodes, nil, nil, nil, nil)) } }