Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
130 changes: 127 additions & 3 deletions pkg/cli/admin/mustgather/mustgather.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"os"
"path"
"regexp"
"sort"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -103,6 +104,10 @@ const (
concurrentMG = 4
// annotation to look for in ClusterServiceVersions and ClusterOperators when using --all-images
mgAnnotation = "operators.openshift.io/must-gather-image"

notReadyTaintKey = "node.kubernetes.io/not-ready"
unreachableTaintKey = "node.kubernetes.io/unreachable"
controlPlaneNodeRoleLabel = "node-role.kubernetes.io/control-plane"
)

func NewMustGatherCommand(f kcmdutil.Factory, streams genericiooptions.IOStreams) *cobra.Command {
Expand Down Expand Up @@ -476,6 +481,9 @@ func (o *MustGatherOptions) Run() error {
}

// ... and create must-gather pod(s)
candidateNames := getCandidateNodeNames(nodes, hasMaster)
affinity := buildNodeAffinity(candidateNames)

var pods []*corev1.Pod
for _, image := range o.Images {
_, err := imagereference.Parse(image)
Expand All @@ -496,7 +504,7 @@ func (o *MustGatherOptions) Run() error {
return err
}
for _, node := range nodes.Items {
pods = append(pods, o.newPod(node.Name, image, hasMaster))
pods = append(pods, o.newPod(node.Name, image, hasMaster, affinity))
}
} else {
if o.NodeName != "" {
Expand All @@ -506,7 +514,7 @@ func (o *MustGatherOptions) Run() error {
return err
}
}
pods = append(pods, o.newPod(o.NodeName, image, hasMaster))
pods = append(pods, o.newPod(o.NodeName, image, hasMaster, affinity))
}
}

Expand Down Expand Up @@ -924,7 +932,7 @@ func newClusterRoleBinding(ns *corev1.Namespace) *rbacv1.ClusterRoleBinding {
// newPod creates a pod with 2 containers with a shared volume mount:
// - gather: init containers that run gather command
// - copy: no-op container we can exec into
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.Pod {
func (o *MustGatherOptions) newPod(node, image string, hasMaster bool, affinity *corev1.Affinity) *corev1.Pod {
zero := int64(0)

nodeSelector := map[string]string{
Expand Down Expand Up @@ -956,6 +964,7 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
// so setting priority class to system-cluster-critical
PriorityClassName: "system-cluster-critical",
RestartPolicy: corev1.RestartPolicyNever,
Affinity: affinity,
Volumes: []corev1.Volume{
{
Name: "must-gather-output",
Expand Down Expand Up @@ -1058,6 +1067,121 @@ func (o *MustGatherOptions) newPod(node, image string, hasMaster bool) *corev1.P
return ret
}

func getNodeLastHeartbeatTime(node corev1.Node) *metav1.Time {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady {
if !cond.LastHeartbeatTime.IsZero() {
return &cond.LastHeartbeatTime
}
return nil
}
}
return nil
}

func isNodeReadyByCondition(node corev1.Node) bool {
for _, cond := range node.Status.Conditions {
if cond.Type == corev1.NodeReady && cond.Status == corev1.ConditionTrue {
return true
}
}
return false
}

func isNodeReadyAndReachableByTaint(node corev1.Node) bool {
for _, taint := range node.Spec.Taints {
if taint.Key == unreachableTaintKey || taint.Key == notReadyTaintKey {
return false
}
}
return true
}

func getCandidateNodeNames(nodes *corev1.NodeList, hasMaster bool) []string {
var controlPlaneNodes, allControlPlaneNodes, workerNodes, unschedulableNodes, remainingNodes, selectedNodes []corev1.Node
for _, node := range nodes.Items {
if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok {
allControlPlaneNodes = append(allControlPlaneNodes, node)
}
if !isNodeReadyByCondition(node) || !isNodeReadyAndReachableByTaint(node) {
remainingNodes = append(remainingNodes, node)
continue
}
if node.Spec.Unschedulable {
unschedulableNodes = append(unschedulableNodes, node)
continue
}
if _, ok := node.Labels[controlPlaneNodeRoleLabel]; ok {
controlPlaneNodes = append(controlPlaneNodes, node)
} else {
workerNodes = append(workerNodes, node)
}
}

if hasMaster {
if len(controlPlaneNodes) > 0 {
selectedNodes = controlPlaneNodes
} else {
selectedNodes = allControlPlaneNodes
}
} else {
selectedNodes = controlPlaneNodes
if len(selectedNodes) == 0 {
selectedNodes = workerNodes
}
if len(selectedNodes) == 0 {
selectedNodes = unschedulableNodes
}
if len(selectedNodes) == 0 {
selectedNodes = remainingNodes
}
}

sort.SliceStable(selectedNodes, func(i, j int) bool {
iTime := getNodeLastHeartbeatTime(selectedNodes[i])
jTime := getNodeLastHeartbeatTime(selectedNodes[j])
if jTime == nil {
return true
}
if iTime == nil {
return false
}
return jTime.Before(iTime)
})

nodeNames := []string{}
for idx, n := range selectedNodes {
if idx >= 10 {
break
}
nodeNames = append(nodeNames, n.Name)
}
return nodeNames
}

func buildNodeAffinity(nodeHostnames []string) *corev1.Affinity {
if len(nodeHostnames) == 0 {
return nil
}
return &corev1.Affinity{
NodeAffinity: &corev1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &corev1.NodeSelector{
NodeSelectorTerms: []corev1.NodeSelectorTerm{
{
MatchExpressions: []corev1.NodeSelectorRequirement{
{
Key: "kubernetes.io/hostname",
Operator: corev1.NodeSelectorOpIn,
Values: nodeHostnames,
},
},
},
},
},
},
}
}

// BackupGathering is called if the full must-gather has an error. This is useful for making sure we get *something*
// no matter what has failed. It should be focused on universal openshift failures.
func (o *MustGatherOptions) BackupGathering(ctx context.Context, errs []error) {
Expand Down
25 changes: 20 additions & 5 deletions pkg/cli/rsync/copy_tar.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,15 +64,30 @@ func deleteContents(dir string) error {
return err
}
for _, f := range files {
// Sanitize the filename to prevent path traversal attacks
fileName := f.Name()
if strings.Contains(fileName, "..") || strings.Contains(fileName, "/") || strings.Contains(fileName, "\\") {
klog.V(4).Infof("Skipping potentially malicious filename: %s", fileName)
continue
}

// Ensure the resolved path is still within the target directory
targetPath := filepath.Join(dir, fileName)
cleanPath := filepath.Clean(targetPath)
if !strings.HasPrefix(cleanPath, filepath.Clean(dir)+string(filepath.Separator)) && cleanPath != filepath.Clean(dir) {
klog.V(4).Infof("Skipping path traversal attempt: %s", fileName)
continue
}

if f.IsDir() {
klog.V(5).Infof("Deleting directory: %s", f.Name())
err = os.RemoveAll(filepath.Clean(filepath.Join(dir, f.Name())))
klog.V(5).Infof("Deleting directory: %s", fileName)
err = os.RemoveAll(cleanPath)
} else {
klog.V(5).Infof("Deleting file: %s", f.Name())
err = os.Remove(filepath.Clean(filepath.Join(dir, f.Name())))
klog.V(5).Infof("Deleting file: %s", fileName)
err = os.Remove(cleanPath)
}
if err != nil {
klog.V(4).Infof("Error deleting file or directory: %s: %v", f.Name(), err)
klog.V(4).Infof("Error deleting file or directory: %s: %v", fileName, err)
return err
}
}
Expand Down