Skip to content

Commit 6861ef5

Browse files
Merge pull request #1374 from tkatila/e2e-gpu-tf
e2e: gpu: add a basic tensorflow test
2 parents a70651f + 4212145 commit 6861ef5

File tree

4 files changed

+121
-2
lines changed

4 files changed

+121
-2
lines changed
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
apiVersion: v1
2+
kind: Pod
3+
metadata:
4+
name: training-pod
5+
spec:
6+
restartPolicy: Never
7+
containers:
8+
- name: testcontainer
9+
image: intel/intel-extension-for-tensorflow:latest
10+
imagePullPolicy: IfNotPresent
11+
securityContext:
12+
allowPrivilegeEscalation: false
13+
command: ["/bin/sh", "-c"]
14+
args: ["python /code/training.py"]
15+
resources:
16+
limits:
17+
gpu.intel.com/i915: 1
18+
requests:
19+
gpu.intel.com/i915: 1
20+
volumeMounts:
21+
- mountPath: /code
22+
name: code
23+
volumes:
24+
- configMap:
25+
name: training-code
26+
name: code
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
configMapGenerator:
2+
- name: training-code
3+
files:
4+
- training.py
5+
6+
resources:
7+
- deployment.yaml
8+
9+
images:
10+
- name: intel/intel-extension-for-tensorflow
11+
newTag: 1.2.0-gpu
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Copyright 2018 The TensorFlow Authors.
2+
# Copyright 2023 Intel Corporation. All Rights Reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
16+
# original code from:
17+
# https://github.com/tensorflow/examples/blob/master/courses/udacity_intro_to_tensorflow_for_deep_learning/l02c01_celsius_to_fahrenheit.ipynb
18+
# this is slightly modified to run explicitly with XPU devices
19+
20+
import tensorflow as tf
21+
import intel_extension_for_tensorflow as itex
22+
import numpy as np
23+
24+
print("BACKENDS: ", str(itex.get_backend()))
25+
26+
devs = tf.config.list_physical_devices('XPU')
27+
28+
print(devs)
29+
30+
if not devs:
31+
raise Exception("No devices found")
32+
33+
with tf.device("/xpu:0"):
34+
celsius_q = np.array([-40, -10, 0, 8, 15, 22, 38], dtype=float)
35+
fahrenheit_a = np.array([-40, 14, 32, 46, 59, 72, 100], dtype=float)
36+
37+
model = tf.keras.Sequential([
38+
tf.keras.layers.Dense(units=1, input_shape=[1])
39+
])
40+
41+
model.compile(loss='mean_squared_error',
42+
optimizer=tf.keras.optimizers.Adam(0.1))
43+
44+
history = model.fit(celsius_q, fahrenheit_a, epochs=500, verbose=False)
45+
46+
print("model trained")
47+
48+
test = [100.0]
49+
p = model.predict(test)
50+
51+
if len(p) != 1:
52+
raise Exception("invalid result obj")
53+
54+
prediction = p[0]
55+
56+
if prediction >= 211 and prediction <= 213:
57+
print("inference ok: %f" % prediction)
58+
else:
59+
raise Exception("bad prediction %f" % prediction)
60+
61+
print("SUCCESS")

test/e2e/gpu/gpu.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222

2323
"github.com/intel/intel-device-plugins-for-kubernetes/test/e2e/utils"
2424
"github.com/onsi/ginkgo/v2"
25+
"github.com/onsi/gomega"
2526
v1 "k8s.io/api/core/v1"
2627
"k8s.io/apimachinery/pkg/api/resource"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -35,8 +36,10 @@ import (
3536
)
3637

3738
const (
38-
kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml"
39-
containerName = "testcontainer"
39+
kustomizationYaml = "deployments/gpu_plugin/kustomization.yaml"
40+
containerName = "testcontainer"
41+
tfKustomizationYaml = "deployments/gpu_tensorflow_test/kustomization.yaml"
42+
tfPodName = "training-pod"
4043
)
4144

4245
func init() {
@@ -118,5 +121,23 @@ func describe() {
118121

119122
framework.Logf("found card and renderD from the log")
120123
})
124+
125+
ginkgo.It("run a small workload on the GPU", func(ctx context.Context) {
126+
kustomYaml, err := utils.LocateRepoFile(tfKustomizationYaml)
127+
if err != nil {
128+
framework.Failf("unable to locate %q: %v", tfKustomizationYaml, err)
129+
}
130+
131+
ginkgo.By("submitting demo deployment")
132+
133+
e2ekubectl.RunKubectlOrDie(f.Namespace.Name, "apply", "-k", filepath.Dir(kustomYaml))
134+
135+
ginkgo.By("waiting the pod to finish")
136+
137+
err = e2epod.WaitForPodSuccessInNamespaceTimeout(ctx, f.ClientSet, tfPodName, f.Namespace.Name, 300*time.Second)
138+
gomega.Expect(err).To(gomega.BeNil(), utils.GetPodLogs(ctx, f, tfPodName, containerName))
139+
140+
framework.Logf("tensorflow execution succeeded!")
141+
})
121142
})
122143
}

0 commit comments

Comments
 (0)