Skip to content

OCPBUGS-57520: e2e: llc: initial tests for cpu allocation #1308

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jun 18, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
272 changes: 267 additions & 5 deletions test/e2e/performanceprofile/functests/13_llc/llc.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,23 @@ import (
"fmt"
"sort"
"strconv"
"strings"
"time"

. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/labels"

"github.com/jaypipes/ghw/pkg/cpu"
"github.com/jaypipes/ghw/pkg/topology"

"k8s.io/apimachinery/pkg/runtime"
"k8s.io/utils/cpuset"
"k8s.io/utils/ptr"
Expand All @@ -27,6 +33,7 @@ import (
machineconfigv1 "github.com/openshift/api/machineconfiguration/v1"

performancev2 "github.com/openshift/cluster-node-tuning-operator/pkg/apis/performanceprofile/v2"
"github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components"
profilecomponent "github.com/openshift/cluster-node-tuning-operator/pkg/performanceprofile/controller/performanceprofile/components/profile"
testutils "github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/cgroup"
Expand All @@ -41,6 +48,7 @@ import (
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/nodes"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/pods"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/poolname"

"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profiles"
"github.com/openshift/cluster-node-tuning-operator/test/e2e/performanceprofile/functests/utils/profilesupdate"
)
Expand All @@ -54,9 +62,35 @@ const (
deploymentDeletionTime = 1 * time.Minute
)

const (
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
expectedMinL3GroupSize = 8
)

type Machine struct {
CPU *cpu.Info `json:"cpu"`
Topology *topology.Info `json:"topology"`
}

type CacheInfo struct {
NodeID int
Level int
CPUs cpuset.CPUSet
}

func (ci CacheInfo) String() string {
return fmt.Sprintf("NUMANode=%d cacheLevel=%d cpus=<%s>", ci.NodeID, ci.Level, ci.CPUs.String())
}

type MachineData struct {
Info Machine
Caches []CacheInfo
}

var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.OpenShift), string(label.UnCoreCache)), Ordered, func() {
var (
workerRTNodes []corev1.Node
machineDatas map[string]MachineData // nodeName -> MachineData
initialProfile, perfProfile *performancev2.PerformanceProfile
performanceMCP string
err error
Expand All @@ -69,26 +103,43 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
)

BeforeAll(func() {
var hasMachineData bool
profileAnnotations = make(map[string]string)
ctx := context.Background()

workerRTNodes, err = nodes.GetByLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred())

workerRTNodes, err = nodes.MatchingOptionalSelector(workerRTNodes)
Expect(err).ToNot(HaveOccurred(), fmt.Sprintf("error looking for the optional selector: %v", err))
Expect(err).ToNot(HaveOccurred(), "error looking for the optional selector: %v", err)

if len(workerRTNodes) < 1 {
Skip("need at least a worker node")
}

perfProfile, err = profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
Expect(err).ToNot(HaveOccurred())
initialProfile = perfProfile.DeepCopy()

By(fmt.Sprintf("collecting machine infos for %d nodes", len(workerRTNodes)))
machineDatas, hasMachineData, err = collectMachineDatas(ctx, workerRTNodes)
if !hasMachineData {
Skip("need machinedata available - please check the image for the presence of the machineinfo tool")
}
Expect(err).ToNot(HaveOccurred())

for node, data := range machineDatas {
testlog.Infof("node=%q data=%v", node, data.Caches)
}

getter, err = cgroup.BuildGetter(ctx, testclient.DataPlaneClient, testclient.K8sClient)
Expect(err).ToNot(HaveOccurred())
cgroupV2, err = cgroup.IsVersion2(ctx, testclient.DataPlaneClient)
Expect(err).ToNot(HaveOccurred())
if !cgroupV2 {
Skip("prefer-align-cpus-by-uncorecache cpumanager policy options is supported in cgroupv2 configuration only")
}

performanceMCP, err = mcps.GetByProfile(perfProfile)
Expect(err).ToNot(HaveOccurred())

Expand Down Expand Up @@ -116,16 +167,17 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
// Apply Annotation to enable align-cpu-by-uncorecache cpumanager policy option
if perfProfile.Annotations == nil || perfProfile.Annotations["kubeletconfig.experimental"] != llcPolicy {
testlog.Info("Enable align-cpus-by-uncorecache cpumanager policy")
perfProfile.Annotations = profileAnnotations
prof := perfProfile.DeepCopy()
prof.Annotations = profileAnnotations

By("updating performance profile")
profiles.UpdateWithRetry(perfProfile)
profiles.UpdateWithRetry(prof)

By(fmt.Sprintf("Applying changes in performance profile and waiting until %s will start updating", poolName))
profilesupdate.WaitForTuningUpdating(ctx, perfProfile)
profilesupdate.WaitForTuningUpdating(ctx, prof)

By(fmt.Sprintf("Waiting when %s finishes updates", poolName))
profilesupdate.WaitForTuningUpdated(ctx, perfProfile)
profilesupdate.WaitForTuningUpdated(ctx, prof)
}
})

Expand Down Expand Up @@ -215,6 +267,7 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
})
})
})

Context("Functional Tests with SMT Enabled", func() {
var (
L3CacheGroupSize int
Expand Down Expand Up @@ -854,6 +907,81 @@ var _ = Describe("[rfe_id:77446] LLC-aware cpu pinning", Label(string(label.Open
Entry("[test_id:81673] requesting less than L3Cache group size cpus", "test-deployment2", podCpuResourceLessthanL3CacheSize),
)
})

Context("Runtime Tests", func() {
var (
targetNodeName string // pick a random node to simplify our testing - e.g. to know ahead of time expected L3 group size
targetNodeInfo MachineData // shortcut. Note: **SHALLOW COPY**
targetL3GroupSize int

testPod *corev1.Pod
)

BeforeEach(func() {
targetNodeName = workerRTNodes[0].Name // pick random node
var ok bool
targetNodeInfo, ok = machineDatas[targetNodeName]
Expect(ok).To(BeTrue(), "unknown machine data for node %q", targetNodeName)

targetL3GroupSize = expectedL3GroupSize(targetNodeInfo)
// random number corresponding to the minimum we need. No known supported hardware has groups so little, they are all way bigger
Expect(targetL3GroupSize).Should(BeNumerically(">", expectedMinL3GroupSize), "L3 Group size too small: %d", targetL3GroupSize)
})

// TODO move to DeferCleanup?
AfterEach(func() {
if testPod == nil {
return
}
ctx := context.Background()
testlog.Infof("deleting pod %q", testPod.Name)
deleteTestPod(ctx, testPod)
})

It("[test_id:77727] should align containers which request less than a L3 group size exclusive CPUs", func(ctx context.Context) {
askingCPUs := expectedMinL3GroupSize

By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
testPod = makeLLCPod(targetNodeName, askingCPUs)
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())

By("Waiting for the guaranteed pod to be ready")
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")

logs, err := pods.GetLogs(testclient.K8sClient, testPod)
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")

allocatedCPUs, err := cpuset.Parse(logs)
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)

ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
Expect(ok).To(BeTrue(), "pod has not L3-aligned CPUs") // TODO log what?
})

It("[test_id:81669] cannot align containers which request more than a L3 group size exclusive CPUs", func(ctx context.Context) {
askingCPUs := targetL3GroupSize + 2 // TODO: to be really safe we should add SMT level cpus

By(fmt.Sprintf("Creating a test pod asking for %d exclusive CPUs", askingCPUs))
testPod = makeLLCPod(targetNodeName, askingCPUs)
Expect(testclient.Client.Create(ctx, testPod)).To(Succeed())

By("Waiting for the guaranteed pod to be ready")
testPod, err = pods.WaitForCondition(ctx, client.ObjectKeyFromObject(testPod), corev1.PodReady, corev1.ConditionTrue, 5*time.Minute)
Expect(err).ToNot(HaveOccurred(), "Guaranteed pod did not become ready in time")

logs, err := pods.GetLogs(testclient.K8sClient, testPod)
Expect(err).ToNot(HaveOccurred(), "Cannot get logs from test pod")

allocatedCPUs, err := cpuset.Parse(logs)
Expect(err).ToNot(HaveOccurred(), "Cannot get cpuset for pod %s/%s from logs %q", testPod.Namespace, testPod.Name, logs)
Expect(allocatedCPUs.Size()).To(Equal(askingCPUs), "asked %d exclusive CPUs got %v", askingCPUs, allocatedCPUs)

ok, _ := isCPUSetLLCAligned(targetNodeInfo.Caches, allocatedCPUs)
Expect(ok).To(BeFalse(), "pod exceeds L3 group capacity so it cannot have L3-aligned CPUs") // TODO log what?
})
})
})

// create Machine config to create text file required to enable prefer-align-cpus-by-uncorecache policy option
Expand Down Expand Up @@ -995,3 +1123,137 @@ func transformToNoSMT(input map[int]map[int][]int) map[int][]int {
}
return result
}

func MachineFromJSON(data string) (Machine, error) {
ma := Machine{}
rd := strings.NewReader(data)
err := json.NewDecoder(rd).Decode(&ma)
return ma, err
}

func isCPUSetLLCAligned(infos []CacheInfo, cpus cpuset.CPUSet) (bool, *CacheInfo) {
for idx := range infos {
info := &infos[idx]
if cpus.IsSubsetOf(info.CPUs) {
return true, info
}
}
return false, nil
}

func computeLLCLayout(mi Machine) []CacheInfo {
ret := []CacheInfo{}
for _, node := range mi.Topology.Nodes {
for _, cache := range node.Caches {
if cache.Level < 3 { // TODO
continue
}
ret = append(ret, CacheInfo{
NodeID: node.ID,
Level: int(cache.Level),
CPUs: cpusetFromLogicalProcessors(cache.LogicalProcessors...),
})
}
}
return ret
}

func cpusetFromLogicalProcessors(procs ...uint32) cpuset.CPUSet {
cpuList := make([]int, 0, len(procs))
for _, proc := range procs {
cpuList = append(cpuList, int(proc))
}
return cpuset.New(cpuList...)
}

func expectedL3GroupSize(md MachineData) int {
// TODO: we assume all L3 Groups are equal in size.
for idx := range md.Caches {
cache := &md.Caches[idx]
if cache.Level != 3 {
continue
}
return cache.CPUs.Size()
}
return 0
}

func collectMachineDatas(ctx context.Context, nodeList []corev1.Node) (map[string]MachineData, bool, error) {
cmd := []string{"/usr/bin/machineinfo"}
infos := make(map[string]MachineData)
for idx := range nodeList {
node := &nodeList[idx]
out, err := nodes.ExecCommand(ctx, node, cmd)
if err != nil {
return infos, false, err
}

info, err := MachineFromJSON(string(out))
if err != nil {
return infos, true, err
}

infos[node.Name] = MachineData{
Info: info,
Caches: computeLLCLayout(info), // precompute
}
}
return infos, true, nil
}

func makeLLCPod(nodeName string, guaranteedCPUs int) *corev1.Pod {
testPod := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "test-",
Labels: map[string]string{
"test": "",
},
Namespace: testutils.NamespaceTesting,
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{
{
Name: "test",
Image: images.Test(),
Command: []string{
"/bin/sh", "-c", "cat /sys/fs/cgroup/cpuset.cpus.effective && sleep 10h",
},
},
},
NodeName: nodeName,
NodeSelector: map[string]string{
testutils.LabelHostname: nodeName,
},
},
}
if guaranteedCPUs > 0 {
testPod.Spec.Containers[0].Resources = corev1.ResourceRequirements{
Limits: corev1.ResourceList{
corev1.ResourceCPU: *resource.NewQuantity(int64(guaranteedCPUs), resource.DecimalSI),
corev1.ResourceMemory: resource.MustParse("256Mi"),
},
}
}
profile, _ := profiles.GetByNodeLabels(testutils.NodeSelectorLabels)
runtimeClass := components.GetComponentName(profile.Name, components.ComponentNamePrefix)
testPod.Spec.RuntimeClassName = &runtimeClass
return testPod
}

func deleteTestPod(ctx context.Context, testpod *corev1.Pod) bool {
GinkgoHelper()

// it possible that the pod already was deleted as part of the test, in this case we want to skip teardown
err := testclient.DataPlaneClient.Get(ctx, client.ObjectKeyFromObject(testpod), testpod)
if apierrors.IsNotFound(err) {
return false
}

err = testclient.DataPlaneClient.Delete(ctx, testpod)
Expect(err).ToNot(HaveOccurred())

err = pods.WaitForDeletion(ctx, testpod, pods.DefaultDeletionTimeout*time.Second)
Expect(err).ToNot(HaveOccurred())

return true
}