Skip to content

e2e: llc: initial tests for cpu allocation #1308

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
293 changes: 280 additions & 13 deletions test/e2e/performanceprofile/functests/13_llc/llc.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,42 @@ import (
"encoding/base64"
"encoding/json"
"fmt"
"strings"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

"github.com/jaypipes/ghw/pkg/cpu"
"github.com/jaypipes/ghw/pkg/topology"

corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/cpuset"
"k8s.io/utils/ptr"

"sigs.k8s.io/controller-runtime/pkg/client"

igntypes "github.com/coreos/ignition/v2/config/v3_2/types"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"

machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"
performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
"github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"

testclient "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/client"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/images"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/label"
testlog "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/log"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/mcps"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"

"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"

"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
)
Expand All @@ -39,10 +52,36 @@ const (
fileMode = 0420
)

const (
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
expectedMinL3GroupSize = 8
)

type Machine struct {
CPU *cpu.Info `json:"cpu"`
Topology *topology.Info `json:"topology"`
}

type CacheInfo struct {
NodeID int
Level int
CPUs cpuset.CPUSet
}

func (ci CacheInfo) String() string {
return fmt.Sprintf("NUMANode=%d cacheLevel=%d cpus=<%s>", ci.NodeID, ci.Level, ci.CPUs.String())
}

type MachineData struct {
Info Machine
Caches []CacheInfo
}

var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.OpenShift)), Ordered, func() {
var (
workerRTNodes []corev1.Node
perfProfile *performancev2.PerformanceProfile
machineDatas map[string]MachineData // nodeName -> MachineData
perfProfile *performancev2.PerformanceProfile // original perf profile
performanceMCP string
err error
profileAnnotations map[string]string
Expand All @@ -52,18 +91,34 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
)

BeforeAll(func() {
var hasMachineData bool
profileAnnotations = make(map[string]string)
ctx := context.Background()

workerRTNodes, err = nodes.GetByLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred())

workerRTNodes, err = nodes.MatchingOptionalSelector(workerRTNodes)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("error looking for the optional selector: %v", err))
Expect(err).ToNot(HaveOccurred(), "error looking for the optional selector: %v", err)

if len(workerRTNodes) < 1 {
Skip("need at least a worker node")
}

perfProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred())

By(fmt.Sprintf("collecting machine infos for %d nodes", len(workerRTNodes)))
machineDatas, hasMachineData, err = collectMachineDatas(ctx, workerRTNodes)
if !hasMachineData {
Skip("need machinedata available - please check the image for the presence of the machineinfo tool")
}
Expect(err).ToNot(HaveOccurred())

for node, data := range machineDatas {
testlog.Infof("node=%q data=%v", node, data.Caches)
}

performanceMCP, err = mcps.GetByProfile(perfProfile)
Expect(err).ToNot(HaveOccurred())

Expand Down Expand Up @@ -91,35 +146,38 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
// Apply Annotation to enable align-cpu-by-uncorecache cpumanager policy option
if perfProfile.Annotations == nil || perfProfile.Annotations["kubeletconfig.experimental"] != llcPolicy {
testlog.Info("Enable align-cpus-by-uncorecache cpumanager policy")
perfProfile.Annotations = profileAnnotations
prof := perfProfile.DeepCopy()
prof.Annotations = profileAnnotations

By("updating performance profile")
profiles.UpdateWithRetry(perfProfile)
profiles.UpdateWithRetry(prof)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, perfProfile)
profilesupdate.WaitForTuningUpdating(ctx, prof)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.WaitForTuningUpdated(ctx, perfProfile)
profilesupdate.WaitForTuningUpdated(ctx, prof)
}

})

AfterAll(func() {
if perfProfile == nil {
return //nothing to do!
}

// Delete machine config created to enable uncocre cache cpumanager policy option
// first make sure the profile doesn't have the annotation
ctx := context.Background()
perfProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
perfProfile.Annotations = nil
prof := perfProfile.DeepCopy()
prof.Annotations = nil
By("updating performance profile")
profiles.UpdateWithRetry(perfProfile)
profiles.UpdateWithRetry(prof)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, perfProfile)
profilesupdate.WaitForTuningUpdating(ctx, prof)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.WaitForTuningUpdated(ctx, perfProfile)
profilesupdate.WaitForTuningUpdated(ctx, prof)

// delete the machine config pool
Expect(testclient.Client.Delete(ctx, mc)).To(Succeed())
Expand Down Expand Up @@ -191,6 +249,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
})
})
})

Context("Runtime Tests", func() {
var (
targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
targetL3GroupSize int

testPod *corev1.Pod
)

BeforeEach(func() {
targetNodeName = workerRTNodes[0].Name // pick random node
var ok bool
targetNodeInfo, ok = machineDatas[targetNodeName]
Expect(ok).To(BeTrue(), "unknown machine data for node %q", targetNodeName)

targetL3GroupSize = expectedL3GroupSize(targetNodeInfo)
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
Expect(targetL3GroupSize).Should(BeNumerically(">", expectedMinL3GroupSize), "L3 Group size too small: %d", targetL3GroupSize)
})

// TODO move to DeferCleanup?
AfterEach(func() {
if testPod == nil {
return
}
ctx := context.Background()
testlog.Infof("deleting pod %q", testPod.Name)
deleteTestPod(ctx, testPod)
})

It("should align containers which request less than a L3 group size exclusive CPUs", func(ctx context.Context) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add test case id 77727

askingCPUs := expectedMinL3GroupSize

By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
testPod = makePod(targetNodeName, askingCPUs)
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())

By("Waiting for the guaranteed pod to be ready")
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")

logs, err := pods.GetLogs(testclient.K8sClient, testPod)
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")

allocatedCPUs, err := cpuset.Parse(logs)
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)

ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
Expect(ok).To(BeTrue(), "pod has not L3-aligned CPUs") // TODO log what?
})

It("cannot align containers which request more than a L3 group size exclusive CPUs", func(ctx context.Context) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you test_id: 81669

askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus

By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
testPod = makePod(targetNodeName, askingCPUs)
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())

By("Waiting for the guaranteed pod to be ready")
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")

logs, err := pods.GetLogs(testclient.K8sClient, testPod)
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")

allocatedCPUs, err := cpuset.Parse(logs)
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)

ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
Expect(ok).To(BeFalse(), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs") // TODO log what?
})
})
})

// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
Expand Down Expand Up @@ -239,3 +372,137 @@ func addContent(ignitionConfig *igntypes.Config, content []byte, dst string, mod
},
})
}

func MachineFromJSON(data string) (Machine, error) {
ma := Machine{}
rd := strings.NewReader(data)
err := json.NewDecoder(rd).Decode(&ma)
return ma, err
}

func isCPUSetLLCAligned(infos []CacheInfo, cpus cpuset.CPUSet) (bool, *CacheInfo) {
for idx := range infos {
info := &infos[idx]
if cpus.IsSubsetOf(info.CPUs) {
return true, info
}
}
return false, nil
}

func computeLLCLayout(mi Machine) []CacheInfo {
ret := []CacheInfo{}
for _, node := range mi.Topology.Nodes {
for _, cache := range node.Caches {
if cache.Level < 3 { // TODO
continue
}
ret = append(ret, CacheInfo{
NodeID: node.ID,
Level: int(cache.Level),
CPUs: cpusetFromLogicalProcessors(cache.LogicalProcessors...),
})
}
}
return ret
}

func cpusetFromLogicalProcessors(procs ...uint32) cpuset.CPUSet {
cpuList := make([]int, 0, len(procs))
for _, proc := range procs {
cpuList = append(cpuList, int(proc))
}
return cpuset.New(cpuList...)
}

func expectedL3GroupSize(md MachineData) int {
// TODO: we assume all L3 Groups are equal in size.
for idx := range md.Caches {
cache := &md.Caches[idx]
if cache.Level != 3 {
continue
}
return cache.CPUs.Size()
}
return 0
}

func collectMachineDatas(ctx context.Context, nodeList []corev1.Node) (map[string]MachineData, bool, error) {
cmd := []string{"/usr/bin/machineinfo"}
infos := make(map[string]MachineData)
for idx := range nodeList {
node := &nodeList[idx]
out, err := nodes.ExecCommand(ctx, node, cmd)
if err != nil {
return infos, false, err
}

info, err := MachineFromJSON(string(out))
if err != nil {
return infos, true, err
}

infos[node.Name] = MachineData{
Info: info,
Caches: computeLLCLayout(info), // precompute
}
}
return infos, true, nil
}

func makePod(nodeName string, guaranteedCPUs int) *corev1.Pod {
testPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "test-",
Labels: map[string]string{
"test": "",
},
Namespace: testutils.NamespaceTesting,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Image: images.Test(),
Command: []string{
"/bin/sh", "-c", "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h",
},
},
},
NodeName: nodeName,
NodeSelector: map[string]string{
testutils.LabelHostname: nodeName,
},
},
}
if guaranteedCPUs > 0 {
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: *resource.NewQuantity(int64(guaranteedCPUs), resource.DecimalSI),
corev1.ResourceMemory: resource.MustParse("256Mi"),
},
}
}
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix)
testPod.Spec.RuntimeClassName = &runtimeClass
return testPod
}

func deleteTestPod(ctx context.Context, testpod *corev1.Pod) bool {
GinkgoHelper()

// it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
if apierrors.IsNotFound(err) {
return false
}

err = testclient.DataPlaneClient.Delete(ctx, testpod)
Expect(err).ToNot(HaveOccurred())

err = pods.WaitForDeletion(ctx, testpod, pods.DefaultDeletionTimeout*time.Second)
Expect(err).ToNot(HaveOccurred())

return true
}