From 6274768670b175c1f03fb33e4d0807a4561f2126 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Tue, 20 Jun 2023 17:27:17 +0300 Subject: [PATCH 01/23] Updates to the controller logic (wip) --- .../queuejob/queuejob_controller_ex.go | 476 +++++++++--------- test/yaml/0002-aw-job-quota.yaml | 63 +++ 2 files changed, 314 insertions(+), 225 deletions(-) create mode 100644 test/yaml/0002-aw-job-quota.yaml diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index e303a2b69..93652f0c8 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -35,7 +35,8 @@ import ( "math" "math/rand" "reflect" - "runtime/debug" + + // "runtime/debug" "sort" "strconv" "strings" @@ -94,6 +95,7 @@ import ( clusterstateapi "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/api" clusterstatecache "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/cache" + apiErrors "k8s.io/apimachinery/pkg/api/errors" ) const ( @@ -457,7 +459,7 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed newjob.Status.Running = 0 updateNewJob = newjob.DeepCopy() - if err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + if _, err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } //cannot use cleanup AW, since it puts AW back in running state @@ -515,7 +517,7 @@ func (qjm *XController) PreemptQueueJobs() { updateNewJob = newjob.DeepCopy() } - if err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + if _, err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } if cleanAppWrapper { @@ -544,7 +546,7 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { continue } apiCacheAWJob.Status.CanRun = false - if err := qjm.updateEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { + if _, err := qjm.updateEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, err) } @@ -1101,35 +1103,21 @@ func (qjm *XController) ScheduleNext() { // check if we have enough compute resources for it // if we have enough compute resources then we set the AllocatedReplicas to the total // amount of resources asked by the job - qj, err := qjm.qjqueue.Pop() + queueAW, err := qjm.qjqueue.Pop() if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) return // Try to pop qjqueue again } else { - klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) + klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", queueAW.Name, time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), queueAW, queueAW.ResourceVersion, queueAW.Status) } - apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level - if e != nil { - klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) - return - } - // make sure qj has the latest information - if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) - klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) - apiCacheAWJob.DeepCopyInto(qj) - } - qjm.schedulingAW.AtomicSet(qj) - // Re-compute SystemPriority for DynamicPriority policy if qjm.serverOption.DynamicPriority { klog.V(4).Info("[ScheduleNext] dynamic priority enabled") // Create newHeap to temporarily store qjqueue jobs for updating SystemPriority tempQ := newHeap(cache.MetaNamespaceKeyFunc, HigherSystemPriorityQJ) - qj.Status.SystemPriority = float64(qj.Spec.Priority) + qj.Spec.PrioritySlope*(time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time)).Seconds() - tempQ.Add(qj) + queueAW.Status.SystemPriority = float64(queueAW.Spec.Priority) + queueAW.Spec.PrioritySlope*(time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time)).Seconds() + tempQ.Add(queueAW) for qjm.qjqueue.Length() > 0 { qjtemp, _ := qjm.qjqueue.Pop() qjtemp.Status.SystemPriority = float64(qjtemp.Spec.Priority) + qjtemp.Spec.PrioritySlope*(time.Now().Sub(qjtemp.Status.ControllerFirstTimestamp.Time)).Seconds() @@ -1153,90 +1141,89 @@ func (qjm *XController) ScheduleNext() { } // Retrieve HeadOfLine after priority update - qj, err = qjm.qjqueue.Pop() + queueAW, err = qjm.qjqueue.Pop() if err != nil { klog.V(3).Infof("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) } else { - klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) + klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", queueAW.Name, + time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), queueAW, queueAW.ResourceVersion, queueAW.Status) } - qjm.schedulingAW.AtomicSet(qj) } - if qj.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from prioirty queue is already scheduled. Ignoring request: Status=%+v\n", qj.Name, qj.Status) + if queueAW.Status.CanRun { + klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from priority queue is already scheduled. Ignoring request: Status=%+v\n", queueAW.Name, queueAW.Status) return } - apiCacheAppWrapper, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + var currentAW arbv1.AppWrapper + err = qjm.getAppWrapper(queueAW.Namespace, queueAW.Name, ¤tAW) if err != nil { - klog.Errorf("[ScheduleNext] Fail get AppWrapper job: %s from the api cache, err=%#v", qj.Name, err) + klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", queueAW.Namespace, queueAW.Name, err) return } - if apiCacheAppWrapper.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", apiCacheAppWrapper.Name, apiCacheAppWrapper.Status) + qjm.schedulingAW.AtomicSet(¤tAW) + + if currentAW.Status.CanRun { + klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", queueAW.Name, queueAW.Status) return } - qj.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine - qjm.addOrUpdateCondition(qj, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") + currentAW.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine + qjm.addOrUpdateCondition(¤tAW, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") + + currentAW.Status.FilterIgnore = true // update QueueJobState only + updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setHOL") + if err != nil { + klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) + return + } + updatedAW.DeepCopyInto(¤tAW) - qj.Status.FilterIgnore = true // update QueueJobState only - qjm.updateEtcd(qj, "ScheduleNext - setHOL") - qjm.qjqueue.AddUnschedulableIfNotPresent(qj) // working on qj, avoid other threads putting it back to activeQ + qjm.qjqueue.AddUnschedulableIfNotPresent(¤tAW) // working on qj, avoid other threads putting it back to activeQ - klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) + klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), currentAW.Name, currentAW.ResourceVersion, qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW.Status) if qjm.isDispatcher { - klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch Next QueueJob: %s\n", qj.Name) + klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch Next QueueJob: '%s/%s Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.Status) } else { - klog.Infof("[ScheduleNext] [Agent Mode] Deploy Next QueueJob: %s Status=%+v\n", qj.Name, qj.Status) + klog.Infof("[ScheduleNext] [Agent Mode] Deploy Next QueueJob: '%s/%s' Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.Status) } dispatchFailedReason := "AppWrapperNotRunnable." dispatchFailedMessage := "" if qjm.isDispatcher { // Dispatcher Mode - agentId := qjm.chooseAgent(qj) + agentId := qjm.chooseAgent(¤tAW) if agentId != "" { // A proper agent is found. // Update states (CanRun=True) of XQJ in API Server // Add XQJ -> Agent Map - apiQueueJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + //apiQueueJob.Status.CanRun = true + currentAW.Status.CanRun = true + queueJobKey, _ := GetQueueJobKey(¤tAW) + qjm.dispatchMap[queueJobKey] = agentId + klog.V(10).Infof("[TTime] %s, %s: ScheduleNextBeforeEtcd", currentAW.Name, time.Now().Sub(currentAW.CreationTimestamp.Time)) + updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setCanRun") if err != nil { - klog.Errorf("[ScheduleNext] Fail get AppWrapper job: %s from the api cache, err=%#v", qj.Name, err) + klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) return } - // make sure qj has the latest information - if larger(apiQueueJob.ResourceVersion, qj.ResourceVersion) { - klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) - klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiQueueJob.Name, apiQueueJob, apiQueueJob) - apiQueueJob.DeepCopyInto(qj) - } - - //apiQueueJob.Status.CanRun = true - qj.Status.CanRun = true - queueJobKey, _ := GetQueueJobKey(qj) - qjm.dispatchMap[queueJobKey] = agentId - klog.V(10).Infof("[TTime] %s, %s: ScheduleNextBeforeEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) - qjm.updateEtcd(qj, "ScheduleNext - setCanRun") - if err := qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) - qjm.qjqueue.MoveToActiveQueueIfExists(qj) + updatedAW.DeepCopyInto(¤tAW) + if err := qjm.eventQueue.Add(¤tAW); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", + currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, err) + qjm.qjqueue.MoveToActiveQueueIfExists(¤tAW) } else { // successful add to eventQueue, remove from qjqueue - if qjm.qjqueue.IfExist(qj) { - klog.V(10).Infof("[ScheduleNext] AppWrapper %s will be deleted from priority queue and sent to event queue", qj.Name) + if qjm.qjqueue.IfExist(¤tAW) { + klog.V(10).Infof("[ScheduleNext] AppWrapper %s will be deleted from priority queue and sent to event queue", currentAW.Name) } - qjm.qjqueue.Delete(qj) + qjm.qjqueue.Delete(¤tAW) } - - //if _, err := qjm.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(apiQueueJob); err != nil { - // klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", qj.Namespace, qj.Name, err) - //} - klog.V(10).Infof("[TTime] %s, %s: ScheduleNextAfterEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[TTime] %s, %s: ScheduleNextAfterEtcd", currentAW.Name, time.Now().Sub(currentAW.CreationTimestamp.Time)) return } else { dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper." klog.V(2).Infof("[Controller: Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage) - go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) + go qjm.backoff(¤tAW, dispatchFailedReason, dispatchFailedMessage) } } else { // Agent Mode - aggqj := qjm.GetAggregatedResources(qj) + aggqj := qjm.GetAggregatedResources(¤tAW) // HeadOfLine logic HOLStartTime := time.Now() @@ -1246,7 +1233,7 @@ func (qjm *XController) ScheduleNext() { // Try to forward to eventQueue for at most HeadOfLineHoldingTime for !forwarded { klog.Infof("[ScheduleNext] Forwarding loop iteration: %d", fowardingLoopCount) - priorityindex := qj.Status.SystemPriority + priorityindex := currentAW.Status.SystemPriority // Support for Non-Preemption if !qjm.serverOption.Preemption { priorityindex = -math.MaxFloat64 @@ -1256,13 +1243,15 @@ func (qjm *XController) ScheduleNext() { priorityindex = -math.MaxFloat64 } resources, proposedPreemptions := qjm.getAggregatedAvailableResourcesPriority( - qjm.cache.GetUnallocatedResources(), priorityindex, qj, "") - klog.Infof("[ScheduleNext] XQJ %s with resources %v to be scheduled on aggregated idle resources %v", qj.Name, aggqj, resources) + qjm.cache.GetUnallocatedResources(), priorityindex, ¤tAW, "") + klog.Infof("[ScheduleNext] XQJ '%s/%s' with resources %v to be scheduled on aggregated idle resources %v", currentAW.Namespace, currentAW.Name, aggqj, resources) - if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), qj) { + if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), ¤tAW) { // Now evaluate quota fits := true - klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", + currentAW.Namespace, currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, + currentAW.ResourceVersion, currentAW.Status) if qjm.serverOption.QuotaEnabled { if qjm.quotaManager != nil { // Quota tree design: @@ -1273,15 +1262,9 @@ func (qjm *XController) ScheduleNext() { // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' // such node borrows quota from other nodes already in the system) - apiCacheAWJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - if err != nil { - klog.Errorf("[ScheduleNext] Failed to get AppWrapper from API Cache %v/%v: %v", - qj.Namespace, qj.Name, err) - continue - } allTrees := qjm.quotaManager.GetValidQuotaLabels() newLabels := make(map[string]string) - for key, value := range apiCacheAWJob.Labels { + for key, value := range currentAW.Labels { newLabels[key] = value } updateLabels := false @@ -1292,19 +1275,21 @@ func (qjm *XController) ScheduleNext() { } } if updateLabels { - apiCacheAWJob.SetLabels(newLabels) - if err := qjm.updateEtcd(apiCacheAWJob, "ScheduleNext - setDefaultQuota"); err == nil { - klog.V(3).Infof("[ScheduleNext] Default quota added to AW %v", qj.Name) - } else { - klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW %v, skipping dispatch of AW", qj.Name) + currentAW.SetLabels(newLabels) + updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setDefaultQuota") + if err != nil { + klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW '%s/%s', skipping dispatch of AW", currentAW.Namespace, currentAW.Name) return } + klog.V(3).Infof("[ScheduleNext] Default quota added to AW '%s/%s'", currentAW.Namespace, currentAW.Name) + updatedAW.DeepCopyInto(¤tAW) } var msg string var preemptAWs []*arbv1.AppWrapper - quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) + quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(¤tAW, aggqj, proposedPreemptions) if quotaFits { - klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.Infof("[ScheduleNext] HOL quota evaluation successful '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", currentAW.Namespace, + currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, currentAW.ResourceVersion, currentAW.Status) // Set any jobs that are marked for preemption qjm.preemptAWJobs(preemptAWs) } else { // Not enough free quota to dispatch appwrapper @@ -1313,64 +1298,59 @@ func (qjm *XController) ScheduleNext() { dispatchFailedReason += " " dispatchFailedReason += msg } - klog.V(3).Infof("[ScheduleNext] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", - qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) + klog.V(3).Infof("[ScheduleNext] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", currentAW.Namespace, + currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, + currentAW.ResourceVersion, currentAW.Status, msg) } fits = quotaFits } else { fits = false //Quota manager not initialized dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." - klog.Errorf("[ScheduleNext] Quota evaluation is enabled but not initialized. AppWrapper %s/%s does not have enough quota\n", qj.Name, qj.Namespace) + klog.Errorf("[ScheduleNext] Quota evaluation is enabled but not initialized. AppWrapper %s/%s does not have enough quota", currentAW.Namespace, currentAW.Name) } } else { - klog.V(4).Infof("[ScheduleNext] HOL quota evaluation not enabled for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[ScheduleNext] HOL quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", currentAW.Namespace, + currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) } // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched if fits { - // aw is ready to go! - apiQueueJob, e := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level - if e != nil { - klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) - if qjm.quotaManager != nil && quotaFits { - //Quota was allocated for this appwrapper, release it. - qjm.quotaManager.Release(qj) - } - return - } - // make sure qj has the latest information - if larger(apiQueueJob.ResourceVersion, qj.ResourceVersion) { - klog.V(4).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) - klog.V(4).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiQueueJob.Name, apiQueueJob, apiQueueJob) - apiQueueJob.DeepCopyInto(qj) - } desired := int32(0) - for i, ar := range qj.Spec.AggrResources.Items { + for i, ar := range currentAW.Spec.AggrResources.Items { desired += ar.Replicas - qj.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas + currentAW.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas } - qj.Status.CanRun = true - qj.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event + currentAW.Status.CanRun = true + currentAW.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event // Handle k8s watch race condition - if err := qjm.updateEtcd(qj, "ScheduleNext - setCanRun"); err == nil { - // add to eventQueue for dispatching to Etcd - if err = qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) - qjm.qjqueue.MoveToActiveQueueIfExists(qj) - } else { // successful add to eventQueue, remove from qjqueue - qjm.qjqueue.Delete(qj) - forwarded = true - klog.V(4).Infof("[ScheduleNext] %s Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - } //updateEtcd + updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setCanRun") + if err != nil { + //updateEtcd failed + klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) + return + } + updatedAW.DeepCopyInto(¤tAW) + // add to eventQueue for dispatching to Etcd + if err = qjm.eventQueue.Add(¤tAW); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] Fail to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", currentAW.Namespace, + currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, err) + qjm.qjqueue.MoveToActiveQueueIfExists(¤tAW) + } else { // successful add to eventQueue, remove from qjqueue + qjm.qjqueue.Delete(¤tAW) + forwarded = true + klog.V(4).Infof("[ScheduleNext] '%s/%s' Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", + currentAW.Namespace, currentAW.Name, time.Now().Sub(currentAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(¤tAW), + qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) + } } //fits } else { // Not enough free resources to dispatch HOL dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." - klog.V(4).Infof("[ScheduleNext] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[ScheduleNext] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + currentAW.Namespace, currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), + qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) } - schedulingTimeExpired := time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime) * time.Second)) + schedulingTimeExpired := (qjm.serverOption.HeadOfLineHoldingTime > 0) && time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime)*time.Second)) if forwarded { break } else if schedulingTimeExpired { @@ -1378,69 +1358,71 @@ func (qjm *XController) ScheduleNext() { // release quota if allocated if qjm.quotaManager != nil && quotaFits { //Quota was allocated for this appwrapper, release it. - qjm.quotaManager.Release(qj) + qjm.quotaManager.Release(¤tAW) } break } else { // Try to dispatch again after one second if qjm.quotaManager != nil && quotaFits { //release any quota as the qj will be tried again and the quota might have been allocated. - qjm.quotaManager.Release(qj) + qjm.quotaManager.Release(¤tAW) } time.Sleep(time.Second * 1) } fowardingLoopCount += 1 } if !forwarded { // start thread to backoff - klog.V(3).Infof("[ScheduleNext] HOL backoff %s after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(3).Infof("[ScheduleNext] HOL backoff '%s/%s' after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", currentAW.Namespace, + currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, currentAW.ResourceVersion, currentAW.Status) if qjm.quotaManager != nil && quotaFits { - qjm.quotaManager.Release(qj) + qjm.quotaManager.Release(¤tAW) } - go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) + go qjm.backoff(¤tAW, dispatchFailedReason, dispatchFailedMessage) } } } // Update AppWrappers in etcd // todo: This is a current workaround for duplicate message bug. -func (cc *XController) updateEtcd(qj *arbv1.AppWrapper, at string) error { - //apiCacheAWJob, e := cc.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - // - //if (e != nil) { - // klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", - // apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, e) - // return e - //} - - //TODO: Remove next line - var apiCacheAWJob *arbv1.AppWrapper - //TODO: Remove next line - apiCacheAWJob = qj - apiCacheAWJob.Status.Sender = "before " + at // set Sender string to indicate code location - apiCacheAWJob.Status.Local = false // for Informer FilterFunc to pickup - if _, err := cc.arbclients.ArbV1().AppWrappers(apiCacheAWJob.Namespace).Update(apiCacheAWJob); err != nil { - klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", - apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, err) - return err - // } else { // qjj should be the same as qj except with newer ResourceVersion - // qj.ResourceVersion = qjj.ResourceVersion // update new ResourceVersion from etcd +func (cc *XController) updateEtcd(currentAppwrapper *arbv1.AppWrapper, caller string) (*arbv1.AppWrapper, error) { + klog.V(4).Infof("[updateEtcd] trying to update '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + currentAppwrapper.Status.Sender = "before " + caller // set Sender string to indicate code location + currentAppwrapper.Status.Local = false // for Informer FilterFunc to pickup + updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).Update(currentAppwrapper) + if err != nil { + if apiErrors.IsNotFound(err) { + klog.Warningf("[updateEtcd] app wrapper '%s/%s' not found when called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + } else if apiErrors.IsConflict(err) { + klog.Warningf("[updateEtcd] app wrapper '%s/%s' update version conflict called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + } + klog.Errorf("[updateEtcd] Failed to update status of AppWrapper '%s/%s' called by '%s', err=%v", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, err) + return nil, err } - klog.V(10).Infof("[updateEtcd] AppWrapperUpdate success %s at %s &qj=%p qj=%+v", - apiCacheAWJob.Name, at, apiCacheAWJob, apiCacheAWJob) - //qj.Status.Local = true // for Informer FilterFunc to ignore duplicate - //qj.Status.Sender = "after "+ at // set Sender string to indicate code location - return nil + if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { + klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) + } + + klog.V(4).Infof("[updateEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + return updatedAppwrapper.DeepCopy(), nil } -func (cc *XController) updateStatusInEtcd(qj *arbv1.AppWrapper, at string) error { - var apiCacheAWJob *arbv1.AppWrapper - apiCacheAWJob = qj - if _, err := cc.arbclients.ArbV1().AppWrappers(apiCacheAWJob.Namespace).UpdateStatus(apiCacheAWJob); err != nil { - klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", - apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, err) +func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, caller string) error { + klog.V(4).Infof("[updateStatusInEtcd] trying to update '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).UpdateStatus(currentAppwrapper) + if err != nil { + if apiErrors.IsNotFound(err) { + klog.Warningf("[updateEtcd] app wrapper '%s/%s' not found when called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + panic(err) + } else if apiErrors.IsConflict(err) { + klog.Warningf("[updateEtcd] app wrapper '%s/%s' update version conflig called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + panic(err) + } + klog.Errorf("[updateStatusInEtcd] Failed to update status of '%s/%s' called by '%s', err=%v", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, err) return err } - klog.V(10).Infof("[updateEtcd] AppWrapperUpdate success %s at %s &qj=%p qj=%+v", - apiCacheAWJob.Name, at, apiCacheAWJob, apiCacheAWJob) + if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { + klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' not found when called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) + } + klog.V(10).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) return nil } @@ -1555,8 +1537,9 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri apiCacheAWJob.Status.QueueJobState = arbv1.AppWrapperCondBackoff workingAW.Status.FilterIgnore = true // update QueueJobState only, no work needed qjm.addOrUpdateCondition(workingAW, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) - //qjm.updateEtcd(workingAW, "backoff - Rejoining") - qjm.updateStatusInEtcd(workingAW, "backoff - Rejoining") + if err := qjm.updateStatusInEtcd(workingAW, "backoff - Rejoining"); err != nil { + klog.Errorf("[backoff] Failed to updated AW status in etcd '%s/%s'. Continuing with possible stale object without updating conditions.", workingAW.Namespace, workingAW.Name) + } } else { workingAW = q klog.Errorf("[backoff] Failed to retrieve cached object for %s/%s. Continuing with possible stale object without updating conditions.", workingAW.Namespace, workingAW.Name) @@ -1604,7 +1587,7 @@ func (cc *XController) Run(stopCh chan struct{}) { go wait.Until(cc.PreemptQueueJobs, 60*time.Second, stopCh) // This thread is used as a heartbeat to calculate runtime spec in the status - go wait.Until(cc.UpdateQueueJobs, 5*time.Second, stopCh) + //go wait.Until(cc.UpdateQueueJobs, 5*time.Second, stopCh) if cc.isDispatcher { go wait.Until(cc.UpdateAgent, 2*time.Second, stopCh) // In the Agent? @@ -1720,16 +1703,16 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { } // AppWrappers may come out of order. Ignore old ones. if (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { - klog.V(10).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Name, oldQJ, oldQJ) - klog.V(10).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Name, newQJ, newQJ) + klog.V(4).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Name, oldQJ, oldQJ) + klog.V(4).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Name, newQJ, newQJ) return } if equality.Semantic.DeepEqual(newQJ.Status, oldQJ.Status) { - klog.V(10).Infof("[Informer-updateQJ] No change to status field of AppWrapper: %s, oldAW=%+v, newAW=%+v.", newQJ.Name, oldQJ.Status, newQJ.Status) + klog.V(4).Infof("[Informer-updateQJ] No change to status field of AppWrapper: %s, oldAW=%+v, newAW=%+v.", newQJ.Name, oldQJ.Status, newQJ.Status) } - klog.V(3).Infof("[Informer-updateQJ] %s *Delay=%.6f seconds normal enqueue &newQJ=%p Version=%s Status=%+v", newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ, newQJ.ResourceVersion, newQJ.Status) + klog.V(4).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%+v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) cc.enqueue(newQJ) } @@ -1847,23 +1830,23 @@ func (cc *XController) updateQueueJobStatus(queueJobFromAgent *arbv1.AppWrapper) } func (cc *XController) worker() { - defer func() { - if pErr := recover(); pErr != nil { - klog.Errorf("[worker] Panic occurred error: %v, stacktrace: %s", pErr, string(debug.Stack())) - } - }() + // defer func() { + // if pErr := recover(); pErr != nil { + // klog.Errorf("[worker] Panic occurred error: %v, stacktrace: %s", pErr, string(debug.Stack())) + // } + // }() if _, err := cc.eventQueue.Pop(func(obj interface{}) error { - var queuejob *arbv1.AppWrapper + var appWrapper *arbv1.AppWrapper switch v := obj.(type) { case *arbv1.AppWrapper: - queuejob = v + appWrapper = v default: klog.Errorf("[worker] eventQueue.Pop un-supported type. obj=%+v", obj) return nil } - klog.V(10).Infof("[worker] %s *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) + klog.V(10).Infof("[worker] %s *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", appWrapper.Name, time.Now().Sub(appWrapper.Status.ControllerFirstTimestamp.Time).Seconds(), appWrapper, appWrapper.ResourceVersion, appWrapper.Status) - if queuejob == nil { + if appWrapper == nil { if acc, err := meta.Accessor(obj); err != nil { klog.Warningf("[worker] Failed to get AppWrapper for %v/%v", acc.GetNamespace(), acc.GetName()) } @@ -1871,14 +1854,13 @@ func (cc *XController) worker() { return nil } - // sync AppWrapper - if err := cc.syncQueueJob(queuejob); err != nil { - klog.Errorf("[worker] Failed to sync AppWrapper %s, err %#v", queuejob.Name, err) + if err := cc.syncQueueJob(appWrapper); err != nil { + klog.Errorf("[worker] Failed to sync AppWrapper %s, err %#v", appWrapper.Name, err) // If any error, requeue it. return err } - klog.V(10).Infof("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v", queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) + klog.V(10).Infof("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v", appWrapper.Name, time.Now().Sub(appWrapper.Status.ControllerFirstTimestamp.Time).Seconds(), appWrapper, appWrapper.ResourceVersion, appWrapper.Status) return nil }); err != nil { klog.Errorf("[worker] Fail to pop item from eventQueue, err %#v", err) @@ -1886,39 +1868,33 @@ func (cc *XController) worker() { } } -func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { - cacheAWJob, err := cc.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - if err != nil { - klog.V(10).Infof("[syncQueueJob] AppWrapper %s not found in cache: info=%+v", qj.Name, err) - // Implicit detection of deletion - if apierrors.IsNotFound(err) { - //if (cc.isDispatcher) { - cc.Cleanup(qj) - cc.qjqueue.Delete(qj) - //} +func (cc *XController) syncQueueJob(queueAW *arbv1.AppWrapper) error { + // get the newest version of the appwrapper if available + var currentAW arbv1.AppWrapper + if err := cc.getAppWrapper(queueAW.Namespace, queueAW.Name, ¤tAW); err != nil { + if apiErrors.IsNotFound(err) { + klog.Errorf("[syncQueueJob] Deleting AppWrapper '%s/%s' since it was not found in cache", queueAW.Namespace, queueAW.Name) + cc.Cleanup(queueAW) + cc.qjqueue.Delete(queueAW) return nil } + klog.Errorf("[syncQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", currentAW.Namespace, currentAW.Name, err) return err } - klog.V(10).Infof("[syncQueueJob] Cache AW %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) - // make sure qj has the latest information - if larger(qj.ResourceVersion, qj.ResourceVersion) { - klog.V(10).Infof("[syncQueueJob] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) - klog.V(10).Infof("[syncQueueJobJ] %s found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Name, cacheAWJob, cacheAWJob) - cacheAWJob.DeepCopyInto(qj) - } + klog.V(4).Infof("[syncQueueJob] Cache AW '%s/%s' Version=%s Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.ResourceVersion, currentAW.Status) // If it is Agent (not a dispatcher), update pod information podPhaseChanges := false if !cc.isDispatcher { //Make a copy first to not update cache object and to use for comparing - awNew := qj.DeepCopy() + awNew := currentAW.DeepCopy() // we call sync to update pods running, pending,... - if qj.Status.State == arbv1.AppWrapperStateActive { + if currentAW.Status.State == arbv1.AppWrapperStateActive { err := cc.qjobResControls[arbv1.ResourceTypePod].UpdateQueueJobStatus(awNew) if err != nil { - klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", qj.Name, err) + klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", currentAW.Name, err) + return err } klog.V(10).Infof("[syncQueueJob] AW popped from event queue %s &qj=%p Version=%s Status=%+v", awNew.Name, awNew, awNew.ResourceVersion, awNew.Status) @@ -1928,24 +1904,28 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") awNew.Status.Conditions = append(awNew.Status.Conditions, cond) awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - cc.updateEtcd(awNew, "[syncQueueJob] setRunning") + updateAW, err := cc.updateEtcd(awNew, "[syncQueueJob] setRunning") + if err != nil { + klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", currentAW.Name, err) + } + //save the updated AW state for later use + updateAW.DeepCopyInto(¤tAW) } //For debugging? - if !reflect.DeepEqual(awNew.Status, qj.Status) { + if !reflect.DeepEqual(awNew.Status, currentAW.Status) { podPhaseChanges = true // Using DeepCopy before DeepCopyInto as it seems that DeepCopyInto does not alloc a new memory object - awNewStatus := awNew.Status.DeepCopy() - awNewStatus.DeepCopyInto(&qj.Status) + // awNewStatus := awNew.Status.DeepCopy() + // awNewStatus.DeepCopyInto(¤tAW.Status) //awNew.Status.DeepCopy().DeepCopyInto(&qj.Status) klog.V(10).Infof("[syncQueueJob] AW pod phase change(s) detected %s &eventqueueaw=%p eventqueueawVersion=%s eventqueueawStatus=%+v; &newaw=%p newawVersion=%s newawStatus=%+v", - qj.Name, qj, qj.ResourceVersion, qj.Status, awNew, awNew.ResourceVersion, awNew.Status) + currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, awNew, awNew.ResourceVersion, awNew.Status) } } } - return cc.manageQueueJob(qj, podPhaseChanges) - //return cc.manageQueueJob(cacheAWJob) + return cc.manageQueueJob(¤tAW, podPhaseChanges) } // manageQueueJob is the core method responsible for managing the number of running @@ -1953,11 +1933,14 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // Does NOT modify . func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool) error { var err error - startTime := time.Now() - defer func() { - klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) - }() - + // startTime := time.Now() + // defer func() { + // klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) + // }() + if qj == nil { + klog.Error("[worker-manageQJ] A nill appwrapper was passed in") + return fmt.Errorf("a nill aws is not allowed in call to manageQueueJob") + } if !cc.isDispatcher { // Agent Mode if qj.DeletionTimestamp != nil { @@ -1986,8 +1969,9 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool if podPhaseChanges { // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + if _, err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + // return err } } return nil @@ -2027,7 +2011,12 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool } qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - cc.updateEtcd(qj, "manageQueueJob - setQueueing") + updatedAW, err := cc.updateEtcd(qj, "manageQueueJob - setQueueing") + if err != nil { + klog.Errorf("[manageQueueJob] Failed to updated etcd for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) + return err + } + updatedAW.DeepCopyInto(qj) klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", @@ -2126,10 +2115,12 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool // TODO(k82cn): replaced it with `UpdateStatus` qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch - if err := cc.updateEtcd(qj, "manageQueueJob - afterEtcdDispatching"); err != nil { + updatedAW, err := cc.updateEtcd(qj, "manageQueueJob - afterEtcdDispatching") + if err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) return err } + updatedAW.DeepCopyInto(qj) } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { //set appwrapper status to Complete or RunningHoldCompletion @@ -2152,26 +2143,33 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.Conditions[index] = *cond.DeepCopy() updateQj = qj.DeepCopy() } - cc.updateEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion") + if _, err := cc.updateEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); err != nil { + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + return err + } } //Set appwrapper status to complete if derivedAwStatus == arbv1.AppWrapperStateCompleted { qj.Status.State = derivedAwStatus qj.Status.CanRun = false - var updateQj *arbv1.AppWrapper + updateQj := qj.DeepCopy() index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") if index < 0 { qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") qj.Status.Conditions = append(qj.Status.Conditions, cond) qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted - updateQj = qj.DeepCopy() } else { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") qj.Status.Conditions[index] = *cond.DeepCopy() - updateQj = qj.DeepCopy() } - cc.updateEtcd(updateQj, "[syncQueueJob] setCompleted") + updatedAW, err := cc.updateEtcd(updateQj, "[syncQueueJob] setCompleted") + if err != nil { + cc.quotaManager.Release(updateQj) + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + return err + } + updatedAW.DeepCopyInto(qj) cc.quotaManager.Release(updateQj) } @@ -2179,7 +2177,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool } else if podPhaseChanges { // Continued bug fix // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + if _, err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) } } @@ -2330,3 +2328,31 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { return nil } +func (cc *XController) refreshAppWrapper(appwrapper *arbv1.AppWrapper) error { + klog.V(4).Infof("[refreshAppWrapper] geting fresh copy of '%s/%s'", appwrapper.Namespace, appwrapper.Name) + apiCacheAWJob, err := cc.queueJobLister.AppWrappers(appwrapper.Namespace).Get(appwrapper.Name) + // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level + if err != nil { + return err + } + // make sure qj has the latest information + if larger(apiCacheAWJob.ResourceVersion, appwrapper.ResourceVersion) { + klog.V(4).Infof("[refreshAppWrapper] '%s/%s' found more recent copy from cache with status=%+v", appwrapper.Namespace, appwrapper.Name, appwrapper.Status) + klog.V(4).Infof("[refreshAppWrapper] '%s/%s' found more recent copy from cache with status=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob.Status) + apiCacheAWJob.DeepCopyInto(appwrapper) + } + klog.V(4).Infof("[refreshAppWrapper] refresh of '%s/%s' succeeded", appwrapper.Namespace, appwrapper.Name) + return nil +} +func (cc *XController) getAppWrapper(namespace string, name string, targetAppwrapper *arbv1.AppWrapper) error { + klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s'", namespace, name) + apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) + // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level + if err != nil { + klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' failed", namespace, name) + return err + } + apiCacheAWJob.DeepCopyInto(targetAppwrapper) + klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' suceeded", namespace, name) + return nil +} diff --git a/test/yaml/0002-aw-job-quota.yaml b/test/yaml/0002-aw-job-quota.yaml new file mode 100644 index 000000000..36cfbb31c --- /dev/null +++ b/test/yaml/0002-aw-job-quota.yaml @@ -0,0 +1,63 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: my-job-1 + namespace: test + labels: + quota_context: "bronze" + quota_service: "gold" +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: my-job-1 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-job-1 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: my-job-1 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-job-1 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi From 80b0b490a3fa032a1a4fdfc7edc69ce6e6e08b1b Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 23 Jun 2023 00:42:25 +0300 Subject: [PATCH 02/23] Added retry logic for etc errors. --- .../crd/bases/mcad.ibm.com_appwrappers.yaml | 18 +- .../crds/mcad.ibm.com_appwrappers.yaml | 18 +- .../queuejob/queuejob_controller_ex.go | 899 +++++++++--------- test/yaml/0003-aw-job-no-quota.yaml | 60 ++ test/yaml/0004-aw-large-job-no-quota.yaml | 60 ++ test/yaml/0005-aw-two-quota-jobs.yaml | 127 +++ 6 files changed, 736 insertions(+), 446 deletions(-) create mode 100644 test/yaml/0003-aw-job-no-quota.yaml create mode 100644 test/yaml/0004-aw-large-job-no-quota.yaml create mode 100644 test/yaml/0005-aw-two-quota-jobs.yaml diff --git a/config/crd/bases/mcad.ibm.com_appwrappers.yaml b/config/crd/bases/mcad.ibm.com_appwrappers.yaml index b43d6d3cf..040655b35 100644 --- a/config/crd/bases/mcad.ibm.com_appwrappers.yaml +++ b/config/crd/bases/mcad.ibm.com_appwrappers.yaml @@ -1,5 +1,3 @@ - ---- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: @@ -778,6 +776,10 @@ spec: QueueJob (by Informer) format: date-time type: string + controllerfirstdispatchtimestamp: + description: Microsecond level timestamp when controller first dispatches appwrapper + format: date-time + type: string failed: description: The number of resources which reached phase Failed. format: int32 @@ -790,8 +792,7 @@ spec: description: Is Dispatched? type: boolean local: - description: Indicate if message is a duplicate (for Informer to recognize - duplicate messages) + description: Indicate if message is a duplicate (for Informer to recognize duplicate messages) type: boolean message: type: string @@ -800,15 +801,13 @@ spec: format: int32 type: integer queuejobstate: - description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining, - ... + description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining ... type: string running: format: int32 type: integer sender: - description: Indicate sender of this message (extremely useful for - debugging) + description: Indicate sender of this message (extremely useful for debugging) type: string state: description: State - Pending, Running, Failed, Deleted @@ -828,3 +827,6 @@ spec: type: object served: true storage: true + subresources: + status: {} + diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml index b43d6d3cf..040655b35 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml @@ -1,5 +1,3 @@ - ---- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: @@ -778,6 +776,10 @@ spec: QueueJob (by Informer) format: date-time type: string + controllerfirstdispatchtimestamp: + description: Microsecond level timestamp when controller first dispatches appwrapper + format: date-time + type: string failed: description: The number of resources which reached phase Failed. format: int32 @@ -790,8 +792,7 @@ spec: description: Is Dispatched? type: boolean local: - description: Indicate if message is a duplicate (for Informer to recognize - duplicate messages) + description: Indicate if message is a duplicate (for Informer to recognize duplicate messages) type: boolean message: type: string @@ -800,15 +801,13 @@ spec: format: int32 type: integer queuejobstate: - description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining, - ... + description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining ... type: string running: format: int32 type: integer sender: - description: Indicate sender of this message (extremely useful for - debugging) + description: Indicate sender of this message (extremely useful for debugging) type: string state: description: State - Pending, Running, Failed, Deleted @@ -828,3 +827,6 @@ spec: type: object served: true storage: true + subresources: + status: {} + diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 93652f0c8..3d6cf5919 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -35,6 +35,7 @@ import ( "math" "math/rand" "reflect" + "runtime/debug" // "runtime/debug" "sort" @@ -42,6 +43,7 @@ import ( "strings" "time" + "github.com/eapache/go-resiliency/retrier" qmutils "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota/quotaforestmanager" @@ -459,7 +461,7 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed newjob.Status.Running = 0 updateNewJob = newjob.DeepCopy() - if _, err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } //cannot use cleanup AW, since it puts AW back in running state @@ -517,7 +519,7 @@ func (qjm *XController) PreemptQueueJobs() { updateNewJob = newjob.DeepCopy() } - if _, err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } if cleanAppWrapper { @@ -546,7 +548,7 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { continue } apiCacheAWJob.Status.CanRun = false - if _, err := qjm.updateEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { + if err := qjm.updateStatusInEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, err) } @@ -659,7 +661,7 @@ func GetPodTemplate(qjobRes *arbv1.AppWrapperResource) (*v1.PodTemplateSpec, err template, ok := obj.(*v1.PodTemplate) if !ok { - return nil, fmt.Errorf("Resource template not define as a PodTemplate") + return nil, fmt.Errorf("resource template not define as a PodTemplate") } return &template.Template, nil @@ -806,7 +808,7 @@ func (qjm *XController) getProposedPreemptions(requestingJob *arbv1.AppWrapper, } } - if foundEnoughResources == false { + if !foundEnoughResources { klog.V(10).Infof("[getProposedPreemptions] Not enought preemptable jobs to dispatch %s.", requestingJob.Name) } @@ -860,7 +862,7 @@ func (qjm *XController) getDispatchedAppWrappers() (map[string]*clusterstateapi. for _, aw := range appwrappers { // Get dispatched jobs - if aw.Status.CanRun == true { + if aw.Status.CanRun { id := qmutils.CreateId(aw.Namespace, aw.Name) awrRetVal[id] = qjm.GetAggregatedResources(aw) awsRetVal[id] = aw @@ -997,8 +999,6 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust } } continue - } else { - //Do nothing } } @@ -1103,281 +1103,324 @@ func (qjm *XController) ScheduleNext() { // check if we have enough compute resources for it // if we have enough compute resources then we set the AllocatedReplicas to the total // amount of resources asked by the job - queueAW, err := qjm.qjqueue.Pop() + qj, err := qjm.qjqueue.Pop() if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) return // Try to pop qjqueue again - } else { - klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", queueAW.Name, time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), queueAW, queueAW.ResourceVersion, queueAW.Status) - } - - // Re-compute SystemPriority for DynamicPriority policy - if qjm.serverOption.DynamicPriority { - klog.V(4).Info("[ScheduleNext] dynamic priority enabled") - // Create newHeap to temporarily store qjqueue jobs for updating SystemPriority - tempQ := newHeap(cache.MetaNamespaceKeyFunc, HigherSystemPriorityQJ) - queueAW.Status.SystemPriority = float64(queueAW.Spec.Priority) + queueAW.Spec.PrioritySlope*(time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time)).Seconds() - tempQ.Add(queueAW) - for qjm.qjqueue.Length() > 0 { - qjtemp, _ := qjm.qjqueue.Pop() - qjtemp.Status.SystemPriority = float64(qjtemp.Spec.Priority) + qjtemp.Spec.PrioritySlope*(time.Now().Sub(qjtemp.Status.ControllerFirstTimestamp.Time)).Seconds() - tempQ.Add(qjtemp) + } + qjm.schedulingAW.AtomicSet(qj) + // ensure that current active appwrapper is reset at the end of this function, to prevent + // the appwrapper from being added in synch job + defer qjm.schedulingAW.AtomicSet(nil) + + scheduleNextRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + scheduleNextRetrier.SetJitter(0.05) + //Retry the execution + err = scheduleNextRetrier.Run(func() error { + klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, + qj.ResourceVersion, qj.Status) + + apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- get fresh copy after queue pop") + // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level + if retryErr != nil { + klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, retryErr) + return retryErr } - // move AppWrappers back to activeQ and sort based on SystemPriority - for tempQ.data.Len() > 0 { - qjtemp, _ := tempQ.Pop() - qjm.qjqueue.AddIfNotPresent(qjtemp.(*arbv1.AppWrapper)) + // make sure qj has the latest information + if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { + klog.V(10).Infof("[ScheduleNext] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(10).Infof("[ScheduleNext] '%s/%s' found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) + apiCacheAWJob.DeepCopyInto(qj) } - // Print qjqueue.ativeQ for debugging - if klog.V(4).Enabled() { - pq := qjm.qjqueue.(*PriorityQueue) - if qjm.qjqueue.Length() > 0 { - for key, element := range pq.activeQ.data.items { - qjtemp := element.obj.(*arbv1.AppWrapper) - klog.V(4).Infof("[ScheduleNext] AfterCalc: qjqLength=%d Key=%s index=%d Priority=%.1f SystemPriority=%.1f QueueJobState=%s", - qjm.qjqueue.Length(), key, element.index, float64(qjtemp.Spec.Priority), qjtemp.Status.SystemPriority, qjtemp.Status.QueueJobState) + if qj.Status.CanRun { + klog.V(4).Infof("[ScheduleNext] AppWrapper '%s/%s' from prioirty queue is already scheduled. Ignoring request: Status=%v", qj.Namespace, qj.Name, qj.Status) + return nil + } + + // Re-compute SystemPriority for DynamicPriority policy + if qjm.serverOption.DynamicPriority { + klog.V(4).Info("[ScheduleNext] dynamic priority enabled") + // Create newHeap to temporarily store qjqueue jobs for updating SystemPriority + tempQ := newHeap(cache.MetaNamespaceKeyFunc, HigherSystemPriorityQJ) + qj.Status.SystemPriority = float64(qj.Spec.Priority) + qj.Spec.PrioritySlope*(time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time)).Seconds() + tempQ.Add(qj) + for qjm.qjqueue.Length() > 0 { + qjtemp, _ := qjm.qjqueue.Pop() + qjtemp.Status.SystemPriority = float64(qjtemp.Spec.Priority) + qjtemp.Spec.PrioritySlope*(time.Now().Sub(qjtemp.Status.ControllerFirstTimestamp.Time)).Seconds() + tempQ.Add(qjtemp) + } + // move AppWrappers back to activeQ and sort based on SystemPriority + for tempQ.data.Len() > 0 { + qjtemp, _ := tempQ.Pop() + qjm.qjqueue.AddIfNotPresent(qjtemp.(*arbv1.AppWrapper)) + } + // Print qjqueue.ativeQ for debugging + if klog.V(4).Enabled() { + pq := qjm.qjqueue.(*PriorityQueue) + if qjm.qjqueue.Length() > 0 { + for key, element := range pq.activeQ.data.items { + qjtemp := element.obj.(*arbv1.AppWrapper) + klog.V(4).Infof("[ScheduleNext] AfterCalc: qjqLength=%d Key=%s index=%d Priority=%.1f SystemPriority=%.1f QueueJobState=%s", + qjm.qjqueue.Length(), key, element.index, float64(qjtemp.Spec.Priority), qjtemp.Status.SystemPriority, qjtemp.Status.QueueJobState) + } } } - } - // Retrieve HeadOfLine after priority update - queueAW, err = qjm.qjqueue.Pop() - if err != nil { - klog.V(3).Infof("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) - } else { - klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", queueAW.Name, - time.Now().Sub(queueAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), queueAW, queueAW.ResourceVersion, queueAW.Status) + // Retrieve HeadOfLine after priority update + qj, retryErr = qjm.qjqueue.Pop() + if retryErr != nil { + klog.V(3).Infof("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", retryErr) + return err + } + klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) + apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- after dynamic priority pop") + if retryErr != nil { + klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, retryErr) + return err + } + if apiCacheAWJob.Status.CanRun { + klog.Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", qj.Name, qj.Status) + return nil + } + apiCacheAWJob.DeepCopyInto(qj) + qjm.schedulingAW.AtomicSet(qj) } - } - - if queueAW.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from priority queue is already scheduled. Ignoring request: Status=%+v\n", queueAW.Name, queueAW.Status) - return - } - var currentAW arbv1.AppWrapper - err = qjm.getAppWrapper(queueAW.Namespace, queueAW.Name, ¤tAW) - if err != nil { - klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", queueAW.Namespace, queueAW.Name, err) - return - } - qjm.schedulingAW.AtomicSet(¤tAW) - - if currentAW.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", queueAW.Name, queueAW.Status) - return - } - currentAW.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine - qjm.addOrUpdateCondition(¤tAW, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") + qj.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine + qjm.addOrUpdateCondition(qj, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") - currentAW.Status.FilterIgnore = true // update QueueJobState only - updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setHOL") - if err != nil { - klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) - return - } - updatedAW.DeepCopyInto(¤tAW) - - qjm.qjqueue.AddUnschedulableIfNotPresent(¤tAW) // working on qj, avoid other threads putting it back to activeQ + qj.Status.FilterIgnore = true // update QueueJobState only + err = qjm.updateStatusInEtcd(qj, "ScheduleNext - setHOL") + if err != nil { + return err + } + qjm.qjqueue.AddUnschedulableIfNotPresent(qj) // working on qj, avoid other threads putting it back to activeQ - klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), currentAW.Name, currentAW.ResourceVersion, qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW.Status) - if qjm.isDispatcher { - klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch Next QueueJob: '%s/%s Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.Status) - } else { - klog.Infof("[ScheduleNext] [Agent Mode] Deploy Next QueueJob: '%s/%s' Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.Status) - } - - dispatchFailedReason := "AppWrapperNotRunnable." - dispatchFailedMessage := "" - if qjm.isDispatcher { // Dispatcher Mode - agentId := qjm.chooseAgent(¤tAW) - if agentId != "" { // A proper agent is found. - // Update states (CanRun=True) of XQJ in API Server - // Add XQJ -> Agent Map - //apiQueueJob.Status.CanRun = true - currentAW.Status.CanRun = true - queueJobKey, _ := GetQueueJobKey(¤tAW) - qjm.dispatchMap[queueJobKey] = agentId - klog.V(10).Infof("[TTime] %s, %s: ScheduleNextBeforeEtcd", currentAW.Name, time.Now().Sub(currentAW.CreationTimestamp.Time)) - updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setCanRun") - if err != nil { - klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) - return - } - updatedAW.DeepCopyInto(¤tAW) - if err := qjm.eventQueue.Add(¤tAW); err != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", - currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, err) - qjm.qjqueue.MoveToActiveQueueIfExists(¤tAW) - } else { // successful add to eventQueue, remove from qjqueue - if qjm.qjqueue.IfExist(¤tAW) { - klog.V(10).Infof("[ScheduleNext] AppWrapper %s will be deleted from priority queue and sent to event queue", currentAW.Name) - } - qjm.qjqueue.Delete(¤tAW) - } - klog.V(10).Infof("[TTime] %s, %s: ScheduleNextAfterEtcd", currentAW.Name, time.Now().Sub(currentAW.CreationTimestamp.Time)) - return + klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) + if qjm.isDispatcher { + klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch next appwrapper: '%s/%s Status=%v", qj.Namespace, qj.Name, qj.Status) } else { - dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper." - klog.V(2).Infof("[Controller: Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage) - go qjm.backoff(¤tAW, dispatchFailedReason, dispatchFailedMessage) + klog.Infof("[ScheduleNext] [Agent Mode] Dispatch next appwrapper: '%s/%s' Status=%v", qj.Namespace, qj.Name, qj.Status) } - } else { // Agent Mode - aggqj := qjm.GetAggregatedResources(¤tAW) - - // HeadOfLine logic - HOLStartTime := time.Now() - forwarded := false - fowardingLoopCount := 1 - quotaFits := false - // Try to forward to eventQueue for at most HeadOfLineHoldingTime - for !forwarded { - klog.Infof("[ScheduleNext] Forwarding loop iteration: %d", fowardingLoopCount) - priorityindex := currentAW.Status.SystemPriority - // Support for Non-Preemption - if !qjm.serverOption.Preemption { - priorityindex = -math.MaxFloat64 - } - // Disable Preemption under DynamicPriority. Comment out if allow DynamicPriority and Preemption at the same time. - if qjm.serverOption.DynamicPriority { - priorityindex = -math.MaxFloat64 - } - resources, proposedPreemptions := qjm.getAggregatedAvailableResourcesPriority( - qjm.cache.GetUnallocatedResources(), priorityindex, ¤tAW, "") - klog.Infof("[ScheduleNext] XQJ '%s/%s' with resources %v to be scheduled on aggregated idle resources %v", currentAW.Namespace, currentAW.Name, aggqj, resources) - - if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), ¤tAW) { - // Now evaluate quota - fits := true - klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", - currentAW.Namespace, currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, - currentAW.ResourceVersion, currentAW.Status) - if qjm.serverOption.QuotaEnabled { - if qjm.quotaManager != nil { - // Quota tree design: - // - All AppWrappers without quota submission will consume quota from the 'default' node. - // - All quota trees in the system should have a 'default' node so AppWrappers without - // quota specification can be dispatched - // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value - // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be - // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' - // such node borrows quota from other nodes already in the system) - allTrees := qjm.quotaManager.GetValidQuotaLabels() - newLabels := make(map[string]string) - for key, value := range currentAW.Labels { - newLabels[key] = value - } - updateLabels := false - for _, treeName := range allTrees { - if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { - newLabels[treeName] = "default" - updateLabels = true + + dispatchFailedReason := "AppWrapperNotRunnable." + dispatchFailedMessage := "" + if qjm.isDispatcher { // Dispatcher Mode + agentId := qjm.chooseAgent(qj) + if agentId != "" { // A proper agent is found. + // Update states (CanRun=True) of XQJ in API Server + // Add XQJ -> Agent Map + apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Dispatcher Mode] get appwrapper") + if retryErr != nil { + klog.Errorf("[ScheduleNext] [Dispatcher Mode] failed to retrieve the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, err) + return err + } + // make sure qj has the latest information + if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { + klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] App wrapper '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] App wrapper '%s/%s' found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) + apiCacheAWJob.DeepCopyInto(qj) + } + qj.Status.CanRun = true + queueJobKey, _ := GetQueueJobKey(qj) + qjm.dispatchMap[queueJobKey] = agentId + klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] %s, %s: ScheduleNextBeforeEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + retryErr = qjm.updateStatusInEtcd(qj, "[ScheduleNext] [Dispatcher Mode] - setCanRun") + if retryErr != nil { + klog.Errorf("[ScheduleNext] [Dispatcher Mode] failed to update status in etcd the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, err) + return retryErr + } + if err := qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] [Dispatcher Mode] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) + qjm.qjqueue.MoveToActiveQueueIfExists(qj) + } else { // successful add to eventQueue, remove from qjqueue + if qjm.qjqueue.IfExist(qj) { + klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] AppWrapper %s will be deleted from priority queue and sent to event queue", qj.Name) + } + qjm.qjqueue.Delete(qj) + } + klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] %s, %s: ScheduleNextAfterEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + return nil + } else { + dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper." + klog.V(2).Infof("[ScheduleNex] [Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage) + go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) + } + } else { // Agent Mode + aggqj := qjm.GetAggregatedResources(qj) + + // HeadOfLine logic + HOLStartTime := time.Now() + forwarded := false + fowardingLoopCount := 1 + quotaFits := false + // Try to forward to eventQueue for at most HeadOfLineHoldingTime + for !forwarded { + klog.Infof("[ScheduleNext] [Agent Mode] Forwarding loop iteration: %d", fowardingLoopCount) + priorityindex := qj.Status.SystemPriority + // Support for Non-Preemption + if !qjm.serverOption.Preemption { + priorityindex = -math.MaxFloat64 + } + // Disable Preemption under DynamicPriority. Comment out if allow DynamicPriority and Preemption at the same time. + if qjm.serverOption.DynamicPriority { + priorityindex = -math.MaxFloat64 + } + resources, proposedPreemptions := qjm.getAggregatedAvailableResourcesPriority( + qjm.cache.GetUnallocatedResources(), priorityindex, qj, "") + klog.Infof("[ScheduleNext] [Agent Mode] Appwrapper '%s/%s' with resources %v to be scheduled on aggregated idle resources %v", qj.Namespace, qj.Name, aggqj, resources) + + if aggqj.LessEqual(resources) && qjm.nodeChecks(qjm.cache.GetUnallocatedHistograms(), qj) { + // Now evaluate quota + fits := true + klog.V(4).Infof("[ScheduleNext] [Agent Mode] available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if qjm.serverOption.QuotaEnabled { + if qjm.quotaManager != nil { + // Quota tree design: + // - All AppWrappers without quota submission will consume quota from the 'default' node. + // - All quota trees in the system should have a 'default' node so AppWrappers without + // quota specification can be dispatched + // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value + // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be + // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' + // such node borrows quota from other nodes already in the system) + allTrees := qjm.quotaManager.GetValidQuotaLabels() + newLabels := make(map[string]string) + for key, value := range qj.Labels { + newLabels[key] = value } - } - if updateLabels { - currentAW.SetLabels(newLabels) - updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setDefaultQuota") - if err != nil { - klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW '%s/%s', skipping dispatch of AW", currentAW.Namespace, currentAW.Name) - return + updateLabels := false + for _, treeName := range allTrees { + if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { + newLabels[treeName] = "default" + updateLabels = true + } } - klog.V(3).Infof("[ScheduleNext] Default quota added to AW '%s/%s'", currentAW.Namespace, currentAW.Name) - updatedAW.DeepCopyInto(¤tAW) - } - var msg string - var preemptAWs []*arbv1.AppWrapper - quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(¤tAW, aggqj, proposedPreemptions) - if quotaFits { - klog.Infof("[ScheduleNext] HOL quota evaluation successful '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", currentAW.Namespace, - currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, currentAW.ResourceVersion, currentAW.Status) - // Set any jobs that are marked for preemption - qjm.preemptAWJobs(preemptAWs) - } else { // Not enough free quota to dispatch appwrapper - dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." - if len(msg) > 0 { - dispatchFailedReason += " " - dispatchFailedReason += msg + if updateLabels { + tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] update labels") + if retryErr != nil { + klog.Warningf("[ScheduleNext] [Agent Mode] Failed to added get fresh copy of the app wrapper '%s/%s' to update quota lables, err = %v", qj.Namespace, qj.Name, retryErr) + return retryErr + } + tempAW.SetLabels(newLabels) + updatedAW, retryErr := qjm.updateEtcd(tempAW, "ScheduleNext [Agent Mode] - setDefaultQuota") + if retryErr != nil { + return retryErr + } + klog.Infof("[ScheduleNext] [Agent Mode] Default quota added to AW '%s/%s'", qj.Namespace, qj.Name) + updatedAW.DeepCopyInto(qj) } - klog.V(3).Infof("[ScheduleNext] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", currentAW.Namespace, - currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, - currentAW.ResourceVersion, currentAW.Status, msg) + var msg string + var preemptAWs []*arbv1.AppWrapper + quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) + if quotaFits { + klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + // Set any jobs that are marked for preemption + qjm.preemptAWJobs(preemptAWs) + } else { // Not enough free quota to dispatch appwrapper + dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." + if len(msg) > 0 { + dispatchFailedReason += " " + dispatchFailedReason += msg + } + klog.V(3).Infof("[ScheduleNext] [Agent Mode] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", + qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) + } + fits = quotaFits + } else { + fits = false + //Quota manager not initialized + dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." + klog.Errorf("[ScheduleNext] [Agent Mode] Quota evaluation is enabled but not initialized. AppWrapper '%s/%s' does not have enough quota", qj.Namespace, qj.Name) } - fits = quotaFits } else { - fits = false - //Quota manager not initialized - dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." - klog.Errorf("[ScheduleNext] Quota evaluation is enabled but not initialized. AppWrapper %s/%s does not have enough quota", currentAW.Namespace, currentAW.Name) + klog.V(4).Infof("[ScheduleNext] [Agent Mode] HOL quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, + qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } - } else { - klog.V(4).Infof("[ScheduleNext] HOL quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", currentAW.Namespace, - currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) - } - // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched - if fits { - desired := int32(0) - for i, ar := range currentAW.Spec.AggrResources.Items { - desired += ar.Replicas - currentAW.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas - } - currentAW.Status.CanRun = true - currentAW.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event - // Handle k8s watch race condition - updatedAW, err := qjm.updateEtcd(¤tAW, "ScheduleNext - setCanRun") - if err != nil { - //updateEtcd failed - klog.Errorf("[ScheduleNext] failed to update in etcd the app wrapper '%s/%s', err=%#v", currentAW.Namespace, currentAW.Name, err) - return + // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched + if fits { + // aw is ready to go! + tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] -- ready to dispatch") + if retryErr != nil { + klog.Errorf("[ScheduleNext] [Agent Mode] Failed to get fresh copy of the app wrapper '%s/%s' to update status, err = %v", qj.Namespace, qj.Name, err) + return retryErr + } + desired := int32(0) + for i, ar := range tempAW.Spec.AggrResources.Items { + desired += ar.Replicas + tempAW.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas + } + + tempAW.Status.CanRun = true + tempAW.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event + retryErr = qjm.updateStatusInEtcd(tempAW, "ScheduleNext - setCanRun") + if retryErr != nil { + if qjm.quotaManager != nil && quotaFits { + //Quota was allocated for this appwrapper, release it. + qjm.quotaManager.Release(qj) + } + return retryErr + } + tempAW.DeepCopyInto(qj) + // add to eventQueue for dispatching to Etcd + if err = qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] [Agent Mode] Fail to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Namespace, + qj.Name, qj, qj.ResourceVersion, qj.Status, err) + qjm.qjqueue.MoveToActiveQueueIfExists(qj) + } else { // successful add to eventQueue, remove from qjqueue + qjm.qjqueue.Delete(qj) + forwarded = true + klog.V(4).Infof("[ScheduleNext] [Agent Mode]'%s/%s' Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(qj), + qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } + } //fits + } else { // Not enough free resources to dispatch HOL + dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." + klog.V(4).Infof("[ScheduleNext] [Agent Mode] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), + qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } + // if the HeadOfLineHoldingTime option is not set it will break the loop + schedulingTimeExpired := time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime) * time.Second)) + if forwarded { + break + } else if schedulingTimeExpired { + // stop trying to dispatch after HeadOfLineHoldingTime + // release quota if allocated + if qjm.quotaManager != nil && quotaFits { + //Quota was allocated for this appwrapper, release it. + qjm.quotaManager.Release(qj) } - updatedAW.DeepCopyInto(¤tAW) - // add to eventQueue for dispatching to Etcd - if err = qjm.eventQueue.Add(¤tAW); err != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] Fail to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", currentAW.Namespace, - currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, err) - qjm.qjqueue.MoveToActiveQueueIfExists(¤tAW) - } else { // successful add to eventQueue, remove from qjqueue - qjm.qjqueue.Delete(¤tAW) - forwarded = true - klog.V(4).Infof("[ScheduleNext] '%s/%s' Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", - currentAW.Namespace, currentAW.Name, time.Now().Sub(currentAW.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(¤tAW), - qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) + break + } else { // Try to dispatch again after one second + if qjm.quotaManager != nil && quotaFits { + //release any quota as the qj will be tried again and the quota might have been allocated. + qjm.quotaManager.Release(qj) } - } //fits - } else { // Not enough free resources to dispatch HOL - dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." - klog.V(4).Infof("[ScheduleNext] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - currentAW.Namespace, currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), - qjm.qjqueue.IfExistUnschedulableQ(¤tAW), currentAW, currentAW.ResourceVersion, currentAW.Status) - } - schedulingTimeExpired := (qjm.serverOption.HeadOfLineHoldingTime > 0) && time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime)*time.Second)) - if forwarded { - break - } else if schedulingTimeExpired { - // stop trying to dispatch after HeadOfLineHoldingTime - // release quota if allocated - if qjm.quotaManager != nil && quotaFits { - //Quota was allocated for this appwrapper, release it. - qjm.quotaManager.Release(¤tAW) + time.Sleep(time.Second * 1) } - break - } else { // Try to dispatch again after one second + fowardingLoopCount += 1 + } + if !forwarded { // start thread to backoff + klog.V(3).Infof("[ScheduleNext][Agent Mode] HOL backoff %s after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) if qjm.quotaManager != nil && quotaFits { - //release any quota as the qj will be tried again and the quota might have been allocated. - qjm.quotaManager.Release(¤tAW) + qjm.quotaManager.Release(qj) } - time.Sleep(time.Second * 1) + go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) } - fowardingLoopCount += 1 - } - if !forwarded { // start thread to backoff - klog.V(3).Infof("[ScheduleNext] HOL backoff '%s/%s' after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", currentAW.Namespace, - currentAW.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(¤tAW), qjm.qjqueue.IfExistUnschedulableQ(¤tAW), ¤tAW, currentAW.ResourceVersion, currentAW.Status) - if qjm.quotaManager != nil && quotaFits { - qjm.quotaManager.Release(¤tAW) - } - go qjm.backoff(¤tAW, dispatchFailedReason, dispatchFailedMessage) } + return nil + }) + if apiErrors.IsNotFound(err) { + klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) + return + } + if err != nil { + klog.Warningf("[ScheduleNext] failed to dispatch the app wrapper '%s/%s', err= %v", qj.Namespace, qj.Name, err) + klog.Warningf("[ScheduleNext] retrying dispatch") + qjm.qjqueue.AddIfNotPresent(qj) } } @@ -1389,16 +1432,11 @@ func (cc *XController) updateEtcd(currentAppwrapper *arbv1.AppWrapper, caller st currentAppwrapper.Status.Local = false // for Informer FilterFunc to pickup updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).Update(currentAppwrapper) if err != nil { - if apiErrors.IsNotFound(err) { - klog.Warningf("[updateEtcd] app wrapper '%s/%s' not found when called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - } else if apiErrors.IsConflict(err) { - klog.Warningf("[updateEtcd] app wrapper '%s/%s' update version conflict called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - } - klog.Errorf("[updateEtcd] Failed to update status of AppWrapper '%s/%s' called by '%s', err=%v", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, err) return nil, err } - if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { - klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) + if larger(updatedAppwrapper.ResourceVersion, currentAppwrapper.ResourceVersion) { + klog.Warningf("[updateEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) + klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) } klog.V(4).Infof("[updateEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) @@ -1407,20 +1445,14 @@ func (cc *XController) updateEtcd(currentAppwrapper *arbv1.AppWrapper, caller st func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, caller string) error { klog.V(4).Infof("[updateStatusInEtcd] trying to update '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + currentAppwrapper.Status.Sender = "before " + caller // set Sender string to indicate code location updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).UpdateStatus(currentAppwrapper) if err != nil { - if apiErrors.IsNotFound(err) { - klog.Warningf("[updateEtcd] app wrapper '%s/%s' not found when called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - panic(err) - } else if apiErrors.IsConflict(err) { - klog.Warningf("[updateEtcd] app wrapper '%s/%s' update version conflig called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - panic(err) - } - klog.Errorf("[updateStatusInEtcd] Failed to update status of '%s/%s' called by '%s', err=%v", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, err) return err } - if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { - klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' not found when called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) + if larger(updatedAppwrapper.ResourceVersion, currentAppwrapper.ResourceVersion) { + klog.Warningf("[updateStatusInEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) + klog.Warningf("[updateStatusInEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) } klog.V(10).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) return nil @@ -1529,30 +1561,35 @@ func (qjm *XController) addOrUpdateCondition(aw *arbv1.AppWrapper, condType arbv } func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message string) { - var workingAW *arbv1.AppWrapper - apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(q.Namespace).Get(q.Name) - // Update condition - if e == nil { - workingAW = apiCacheAWJob + + etcUpdateRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + err := etcUpdateRetrier.Run(func() error { + apiCacheAWJob, err := qjm.getAppWrapper(q.Namespace, q.Name, "[backoff] - Rejoining") + if err != nil { + return err + } apiCacheAWJob.Status.QueueJobState = arbv1.AppWrapperCondBackoff - workingAW.Status.FilterIgnore = true // update QueueJobState only, no work needed - qjm.addOrUpdateCondition(workingAW, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) - if err := qjm.updateStatusInEtcd(workingAW, "backoff - Rejoining"); err != nil { - klog.Errorf("[backoff] Failed to updated AW status in etcd '%s/%s'. Continuing with possible stale object without updating conditions.", workingAW.Namespace, workingAW.Name) + apiCacheAWJob.Status.FilterIgnore = true // update QueueJobState only, no work needed + // Update condition + qjm.addOrUpdateCondition(apiCacheAWJob, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) + if err := qjm.updateStatusInEtcd(apiCacheAWJob, "[backoff] - Rejoining"); err != nil { + klog.Warningf("[backoff] Failed to updated AW status in etcd '%s/%s'. Continuing with possible stale object without updating conditions. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) + return err } - } else { - workingAW = q - klog.Errorf("[backoff] Failed to retrieve cached object for %s/%s. Continuing with possible stale object without updating conditions.", workingAW.Namespace, workingAW.Name) - + apiCacheAWJob.DeepCopyInto(q) + return nil + }) + if err != nil { + klog.Errorf("[backoff] Failed to retrieve cached object for %s/%s. Continuing with possible stale object without updating conditions.", q.Namespace, q.Name) } - qjm.qjqueue.AddUnschedulableIfNotPresent(workingAW) - klog.V(3).Infof("[backoff] %s move to unschedulableQ before sleep for %d seconds. activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", workingAW.Name, - qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ((workingAW)), qjm.qjqueue.IfExistUnschedulableQ((workingAW)), workingAW, workingAW.ResourceVersion, workingAW.Status) + qjm.qjqueue.AddUnschedulableIfNotPresent(q) + klog.V(3).Infof("[backoff] %s move to unschedulableQ before sleep for %d seconds. activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", q.Name, + qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ(q), qjm.qjqueue.IfExistUnschedulableQ(q), q, q.ResourceVersion, q.Status) time.Sleep(time.Duration(qjm.serverOption.BackoffTime) * time.Second) - qjm.qjqueue.MoveToActiveQueueIfExists(workingAW) + qjm.qjqueue.MoveToActiveQueueIfExists(q) - klog.V(3).Infof("[backoff] %s activeQ.Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", workingAW.Name, - qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ((workingAW)), qjm.qjqueue.IfExistUnschedulableQ((workingAW)), workingAW, workingAW.ResourceVersion, workingAW.Status) + klog.V(3).Infof("[backoff] %s activeQ.Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", q.Name, + qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ(q), qjm.qjqueue.IfExistUnschedulableQ(q), q, q.ResourceVersion, q.Status) } // Run start AppWrapper Controller @@ -1587,7 +1624,7 @@ func (cc *XController) Run(stopCh chan struct{}) { go wait.Until(cc.PreemptQueueJobs, 60*time.Second, stopCh) // This thread is used as a heartbeat to calculate runtime spec in the status - //go wait.Until(cc.UpdateQueueJobs, 5*time.Second, stopCh) + go wait.Until(cc.UpdateQueueJobs, 5*time.Second, stopCh) if cc.isDispatcher { go wait.Until(cc.UpdateAgent, 2*time.Second, stopCh) // In the Agent? @@ -1702,9 +1739,9 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { return } // AppWrappers may come out of order. Ignore old ones. - if (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { - klog.V(4).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Name, oldQJ, oldQJ) - klog.V(4).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Name, newQJ, newQJ) + if (oldQJ.Namespace == newQJ.Namespace) && (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { + klog.V(4).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Namespace, oldQJ.Name, oldQJ, oldQJ) + klog.V(4).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Namespace, newQJ.Name, newQJ, newQJ) return } @@ -1830,23 +1867,23 @@ func (cc *XController) updateQueueJobStatus(queueJobFromAgent *arbv1.AppWrapper) } func (cc *XController) worker() { - // defer func() { - // if pErr := recover(); pErr != nil { - // klog.Errorf("[worker] Panic occurred error: %v, stacktrace: %s", pErr, string(debug.Stack())) - // } - // }() - if _, err := cc.eventQueue.Pop(func(obj interface{}) error { - var appWrapper *arbv1.AppWrapper + defer func() { + if pErr := recover(); pErr != nil { + klog.Errorf("[worker] Panic occurred error: %v, stacktrace: %s", pErr, string(debug.Stack())) + } + }() + item, err := cc.eventQueue.Pop(func(obj interface{}) error { + var queuejob *arbv1.AppWrapper switch v := obj.(type) { case *arbv1.AppWrapper: - appWrapper = v + queuejob = v default: klog.Errorf("[worker] eventQueue.Pop un-supported type. obj=%+v", obj) return nil } - klog.V(10).Infof("[worker] %s *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", appWrapper.Name, time.Now().Sub(appWrapper.Status.ControllerFirstTimestamp.Time).Seconds(), appWrapper, appWrapper.ResourceVersion, appWrapper.Status) + klog.V(10).Infof("[worker] '%s/%s' *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", queuejob.Namespace, queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) - if appWrapper == nil { + if queuejob == nil { if acc, err := meta.Accessor(obj); err != nil { klog.Warningf("[worker] Failed to get AppWrapper for %v/%v", acc.GetNamespace(), acc.GetName()) } @@ -1854,46 +1891,58 @@ func (cc *XController) worker() { return nil } - if err := cc.syncQueueJob(appWrapper); err != nil { - klog.Errorf("[worker] Failed to sync AppWrapper %s, err %#v", appWrapper.Name, err) + // sync AppWrapper + if err := cc.syncQueueJob(queuejob); err != nil { + klog.Errorf("[worker] Failed to sync AppWrapper '%s/%s', err %#v", queuejob.Namespace, queuejob.Name, err) // If any error, requeue it. return err } - klog.V(10).Infof("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v", appWrapper.Name, time.Now().Sub(appWrapper.Status.ControllerFirstTimestamp.Time).Seconds(), appWrapper, appWrapper.ResourceVersion, appWrapper.Status) + klog.V(10).Infof("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v", queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) return nil - }); err != nil { - klog.Errorf("[worker] Fail to pop item from eventQueue, err %#v", err) + }) + if err != nil { + klog.Warningf("[worker] Fail to process item from eventQueue, err %v. Attempting to re-enqueque...", err) + if err00 := cc.enqueueIfNotPresent(item); err00 != nil { + klog.Errorf("[worker] Failed to re-enqueue item, err %v", err00) + } + klog.Warningf("[worker] Item re-enqueued", err) return } } -func (cc *XController) syncQueueJob(queueAW *arbv1.AppWrapper) error { - // get the newest version of the appwrapper if available - var currentAW arbv1.AppWrapper - if err := cc.getAppWrapper(queueAW.Namespace, queueAW.Name, ¤tAW); err != nil { - if apiErrors.IsNotFound(err) { - klog.Errorf("[syncQueueJob] Deleting AppWrapper '%s/%s' since it was not found in cache", queueAW.Namespace, queueAW.Name) - cc.Cleanup(queueAW) - cc.qjqueue.Delete(queueAW) +func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { + cacheAWJob, err := cc.getAppWrapper(qj.Namespace, qj.Name, "[syncQueueJob] get fresh appwrapper ") + if err != nil { + // Implicit detection of deletion + if apierrors.IsNotFound(err) { + klog.V(4).Infof("[syncQueueJob] AppWrapper %s not found in cache: info=%+v", qj.Name, err) + cc.Cleanup(qj) + cc.qjqueue.Delete(qj) return nil } - klog.Errorf("[syncQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", currentAW.Namespace, currentAW.Name, err) + klog.Errorf("[syncQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", qj.Namespace, qj.Name, err) return err } + klog.V(10).Infof("[syncQueueJob] Cache AW %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) - klog.V(4).Infof("[syncQueueJob] Cache AW '%s/%s' Version=%s Status=%+v", currentAW.Namespace, currentAW.Name, currentAW.ResourceVersion, currentAW.Status) + // make sure qj has the latest information + if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { + klog.V(4).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(4).Infof("[syncQueueJobJ] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) + cacheAWJob.DeepCopyInto(qj) + } // If it is Agent (not a dispatcher), update pod information podPhaseChanges := false - if !cc.isDispatcher { + if !cc.isDispatcher { // agent mode //Make a copy first to not update cache object and to use for comparing - awNew := currentAW.DeepCopy() + awNew := qj.DeepCopy() // we call sync to update pods running, pending,... - if currentAW.Status.State == arbv1.AppWrapperStateActive { + if qj.Status.State == arbv1.AppWrapperStateActive { err := cc.qjobResControls[arbv1.ResourceTypePod].UpdateQueueJobStatus(awNew) if err != nil { - klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", currentAW.Name, err) + klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", qj.Name, err) return err } klog.V(10).Infof("[syncQueueJob] AW popped from event queue %s &qj=%p Version=%s Status=%+v", awNew.Name, awNew, awNew.ResourceVersion, awNew.Status) @@ -1904,28 +1953,27 @@ func (cc *XController) syncQueueJob(queueAW *arbv1.AppWrapper) error { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") awNew.Status.Conditions = append(awNew.Status.Conditions, cond) awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - updateAW, err := cc.updateEtcd(awNew, "[syncQueueJob] setRunning") + err := cc.updateStatusInEtcd(awNew, "[syncQueueJob] setRunning") if err != nil { - klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", currentAW.Name, err) + klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v", qj.Namespace, qj.Name, err) + return err } - //save the updated AW state for later use - updateAW.DeepCopyInto(¤tAW) } //For debugging? - if !reflect.DeepEqual(awNew.Status, currentAW.Status) { + if !reflect.DeepEqual(awNew.Status, qj.Status) { podPhaseChanges = true // Using DeepCopy before DeepCopyInto as it seems that DeepCopyInto does not alloc a new memory object - // awNewStatus := awNew.Status.DeepCopy() - // awNewStatus.DeepCopyInto(¤tAW.Status) - //awNew.Status.DeepCopy().DeepCopyInto(&qj.Status) - klog.V(10).Infof("[syncQueueJob] AW pod phase change(s) detected %s &eventqueueaw=%p eventqueueawVersion=%s eventqueueawStatus=%+v; &newaw=%p newawVersion=%s newawStatus=%+v", - currentAW.Name, currentAW, currentAW.ResourceVersion, currentAW.Status, awNew, awNew.ResourceVersion, awNew.Status) + awNewStatus := awNew.Status.DeepCopy() + awNewStatus.DeepCopyInto(&qj.Status) + klog.V(4).Infof("[syncQueueJob] AW pod phase change(s) detected '%s/%s' &eventqueueaw=%p eventqueueawVersion=%s eventqueueawStatus=%+v; &newaw=%p newawVersion=%s newawStatus=%+v", + qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status, awNew, awNew.ResourceVersion, awNew.Status) } } } - return cc.manageQueueJob(¤tAW, podPhaseChanges) + err = cc.manageQueueJob(qj, podPhaseChanges) + return err } // manageQueueJob is the core method responsible for managing the number of running @@ -1933,14 +1981,11 @@ func (cc *XController) syncQueueJob(queueAW *arbv1.AppWrapper) error { // Does NOT modify . func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool) error { var err error - // startTime := time.Now() - // defer func() { - // klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) - // }() - if qj == nil { - klog.Error("[worker-manageQJ] A nill appwrapper was passed in") - return fmt.Errorf("a nill aws is not allowed in call to manageQueueJob") - } + startTime := time.Now() + defer func() { + klog.V(10).Infof("[manageQueueJob] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) + }() + if !cc.isDispatcher { // Agent Mode if qj.DeletionTimestamp != nil { @@ -1969,9 +2014,9 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool if podPhaseChanges { // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - if _, err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + if err := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) - // return err + return err } } return nil @@ -1997,46 +2042,46 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.State = arbv1.AppWrapperStateEnqueued // add qj to qjqueue only when it is not in UnschedulableQ if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[worker-manageQJ] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[manageQueueJob] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + return nil + } + + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) } else { - klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - } + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + } - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - updatedAW, err := cc.updateEtcd(qj, "manageQueueJob - setQueueing") - if err != nil { - klog.Errorf("[manageQueueJob] Failed to updated etcd for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) - return err - } - updatedAW.DeepCopyInto(qj) - klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { - klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) - cc.enqueue(qj) - } else { - klog.V(3).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + err := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing") + if err != nil { + klog.Errorf("[manageQueueJob] Failed to updated etcd for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) + return err + } + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { + klog.Errorf("manageQueueJob] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) + cc.enqueue(qj) + } else { + klog.V(3).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } return nil } // End of first execution of qj to add to qjqueue for ScheduleNext //Handle recovery condition - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && - !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { + //FIXME there is still are race condition here. + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { // One more check to ensure AW is not the current active schedule object if cc.schedulingAW.IsActiveAppWrapper(qj.Name, qj.Namespace) { cc.qjqueue.AddIfNotPresent(qj) - klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", + klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", qj.Namespace, qj.Name, qj.Status) return nil } @@ -2113,14 +2158,12 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool cc.Cleanup(qj) } - // TODO(k82cn): replaced it with `UpdateStatus` qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch - updatedAW, err := cc.updateEtcd(qj, "manageQueueJob - afterEtcdDispatching") + err := cc.updateStatusInEtcd(qj, "manageQueueJob - afterEtcdDispatching") if err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) return err } - updatedAW.DeepCopyInto(qj) } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { //set appwrapper status to Complete or RunningHoldCompletion @@ -2143,7 +2186,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.Conditions[index] = *cond.DeepCopy() updateQj = qj.DeepCopy() } - if _, err := cc.updateEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); err != nil { + if err := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) return err } @@ -2152,33 +2195,39 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool if derivedAwStatus == arbv1.AppWrapperStateCompleted { qj.Status.State = derivedAwStatus qj.Status.CanRun = false - updateQj := qj.DeepCopy() + var updateQj *arbv1.AppWrapper index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") if index < 0 { qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") qj.Status.Conditions = append(qj.Status.Conditions, cond) qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted + updateQj = qj.DeepCopy() } else { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") qj.Status.Conditions[index] = *cond.DeepCopy() + updateQj = qj.DeepCopy() } - updatedAW, err := cc.updateEtcd(updateQj, "[syncQueueJob] setCompleted") + err := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setCompleted") if err != nil { - cc.quotaManager.Release(updateQj) + if cc.quotaManager != nil { + cc.quotaManager.Release(updateQj) + } klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) return err } - updatedAW.DeepCopyInto(qj) - cc.quotaManager.Release(updateQj) + if cc.quotaManager != nil { + cc.quotaManager.Release(updateQj) + } } // Bugfix to eliminate performance problem of overloading the event queue. } else if podPhaseChanges { // Continued bug fix // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - if _, err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + if err := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + return err } } // Finish adding qj to Etcd for dispatch @@ -2219,7 +2268,9 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - cc.updateEtcd(qj, "manageQueueJob - setQueueing") + if err := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing"); err != nil { + return err + } if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) @@ -2229,44 +2280,36 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } } - - //_, err = cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj) - //if err != nil { - // return err - //} return nil } - - // if !qj.Status.CanRun && qj.Status.State == arbv1.QueueJobStateEnqueued { if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { cc.qjqueue.AddIfNotPresent(qj) return nil } - if qj.Status.CanRun && !qj.Status.IsDispatched { if klog.V(10).Enabled() { current_time := time.Now() klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead Before Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[TTime] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[worker-manageQJ] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } queuejobKey, _ := GetQueueJobKey(qj) - // agentId:=cc.dispatchMap[queuejobKey] - // if agentId!=nil { - if agentId, ok := cc.dispatchMap[queuejobKey]; ok { - klog.V(10).Infof("[Dispatcher Controller] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) - cc.agentMap[agentId].CreateJob(qj) - qj.Status.IsDispatched = true - } else { - klog.Errorf("[Dispatcher Controller] AppWrapper %s not found in dispatcher mapping.", qj.Name) + if cc.isDispatcher { + if agentId, ok := cc.dispatchMap[queuejobKey]; ok { + klog.V(10).Infof("[worker-manageQJ] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) + cc.agentMap[agentId].CreateJob(qj) + qj.Status.IsDispatched = true + } else { + klog.Errorf("[worker-manageQJ] AppWrapper %s not found in dispatcher mapping.", qj.Name) + } } if klog.V(10).Enabled() { current_time := time.Now() - klog.V(10).Infof("[Dispatcher Controller] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[TTime] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[worker-manageQJ] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } - - if _, err := cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj); err != nil { + //FIXME use the updateStatusInEtcd + if _, err := cc.updateEtcd(qj, "[worker-manageQJ] -- set dispatched true"); err != nil { klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", qj.Namespace, qj.Name, err) return err @@ -2328,31 +2371,27 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { return nil } -func (cc *XController) refreshAppWrapper(appwrapper *arbv1.AppWrapper) error { - klog.V(4).Infof("[refreshAppWrapper] geting fresh copy of '%s/%s'", appwrapper.Namespace, appwrapper.Name) - apiCacheAWJob, err := cc.queueJobLister.AppWrappers(appwrapper.Namespace).Get(appwrapper.Name) +func (cc *XController) getAppWrapper(namespace string, name string, caller string) (*arbv1.AppWrapper, error) { + klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) + apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level if err != nil { - return err - } - // make sure qj has the latest information - if larger(apiCacheAWJob.ResourceVersion, appwrapper.ResourceVersion) { - klog.V(4).Infof("[refreshAppWrapper] '%s/%s' found more recent copy from cache with status=%+v", appwrapper.Namespace, appwrapper.Name, appwrapper.Status) - klog.V(4).Infof("[refreshAppWrapper] '%s/%s' found more recent copy from cache with status=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob.Status) - apiCacheAWJob.DeepCopyInto(appwrapper) + klog.Errorf("[getAppWrapper] geting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) + return nil, err } - klog.V(4).Infof("[refreshAppWrapper] refresh of '%s/%s' succeeded", appwrapper.Namespace, appwrapper.Name) - return nil + klog.V(4).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) + return apiCacheAWJob.DeepCopy(), nil } -func (cc *XController) getAppWrapper(namespace string, name string, targetAppwrapper *arbv1.AppWrapper) error { - klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s'", namespace, name) - apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) - // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level - if err != nil { - klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' failed", namespace, name) - return err + +type EtcdErrorClassifier struct { +} + +func (c *EtcdErrorClassifier) Classify(err error) retrier.Action { + if err == nil { + return retrier.Succeed + } else if apiErrors.IsConflict(err) { + return retrier.Retry + } else { + return retrier.Fail } - apiCacheAWJob.DeepCopyInto(targetAppwrapper) - klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' suceeded", namespace, name) - return nil } diff --git a/test/yaml/0003-aw-job-no-quota.yaml b/test/yaml/0003-aw-job-no-quota.yaml new file mode 100644 index 000000000..a0f8452c3 --- /dev/null +++ b/test/yaml/0003-aw-job-no-quota.yaml @@ -0,0 +1,60 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: my-no-quota-job-0003 + namespace: test +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: my-no-quota-job-0003 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-no-quota-job-0003 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: my-no-quota-job-0003 + namespace: test + labels: + appwrapper.mcad.ibm.com: my-no-quota-job-0003 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi diff --git a/test/yaml/0004-aw-large-job-no-quota.yaml b/test/yaml/0004-aw-large-job-no-quota.yaml new file mode 100644 index 000000000..30cf44efd --- /dev/null +++ b/test/yaml/0004-aw-large-job-no-quota.yaml @@ -0,0 +1,60 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: large-job-no-quota + namespace: test +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 3000m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 3000m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: large-job-no-quota + namespace: test + labels: + appwrapper.mcad.ibm.com: large-job-no-quota + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: large-job-no-quota + namespace: test + labels: + appwrapper.mcad.ibm.com: large-job-no-quota + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 3000m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 3000m + nvidia.com/gpu: 0 + memory: 300Mi diff --git a/test/yaml/0005-aw-two-quota-jobs.yaml b/test/yaml/0005-aw-two-quota-jobs.yaml new file mode 100644 index 000000000..36d35a6b1 --- /dev/null +++ b/test/yaml/0005-aw-two-quota-jobs.yaml @@ -0,0 +1,127 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: bronze-job-0005-01 + namespace: test + labels: + quota_context: "bronze" + quota_service: "default" +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: bronze-job-0005-01 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronze-job-0005-01 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: bronze-job-0005-01 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronze-job-0005-01 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi +--- +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: bronze-job-0005-02 + namespace: test + labels: + quota_context: "bronze" + quota_service: "default" +spec: + schedulingSpec: + minAvailable: 1 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: bronze-job-0005-02 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronze-job-0005-02 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + name: bronze-job-0005-02 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronze-job-0005-02 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + containers: + - name: ubuntu + image: ubuntu:latest + imagePullPolicy: IfNotPresent + command: + - sh + - -c + - | + sleep 30 + resources: + requests: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 900m + nvidia.com/gpu: 0 + memory: 300Mi From 24709ee449bf1d493f013676f8e1c8fa79aaecf5 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 23 Jun 2023 11:50:58 +0300 Subject: [PATCH 03/23] Fix small race condition --- pkg/controller/queuejob/active_appwrapper.go | 54 ------------------- .../queuejob/active_appwrapper_test.go | 43 --------------- .../queuejob/queuejob_controller_ex.go | 50 ++++++++++++++--- 3 files changed, 42 insertions(+), 105 deletions(-) delete mode 100644 pkg/controller/queuejob/active_appwrapper.go delete mode 100644 pkg/controller/queuejob/active_appwrapper_test.go diff --git a/pkg/controller/queuejob/active_appwrapper.go b/pkg/controller/queuejob/active_appwrapper.go deleted file mode 100644 index 1aabf70b9..000000000 --- a/pkg/controller/queuejob/active_appwrapper.go +++ /dev/null @@ -1,54 +0,0 @@ -package queuejob - -import ( - "strings" - "sync" - - arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" -) - -// ActiveAppWrapper is current scheduling AppWrapper in the XController struct. -// Its sole purpose is provide a thread safe way to for use the XController logic -type ActiveAppWrapper struct { - activeAW *arbv1.AppWrapper - activeAWMutex *sync.RWMutex -} - -// NewActiveAppWrapper -func NewActiveAppWrapper() *ActiveAppWrapper { - return &ActiveAppWrapper{ - activeAW: nil, - activeAWMutex: &sync.RWMutex{}, - } -} - -// AtomicSet as is name implies, atomically sets the activeAW to the new value -func (aw *ActiveAppWrapper) AtomicSet(newValue *arbv1.AppWrapper) { - aw.activeAWMutex.Lock() - defer aw.activeAWMutex.Unlock() - aw.activeAW = newValue -} - -// IsActiveAppWrapper safely performs the comparison that was done inside the if block -// at line 1977 in the queuejob_controller_ex.go -// The code looked like this: -// -// if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && -// !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { -// // One more check to ensure AW is not the current active schedule object -// if cc.schedulingAW == nil || -// (strings.Compare(cc.schedulingAW.Namespace, qj.Namespace) != 0 && -// strings.Compare(cc.schedulingAW.Name, qj.Name) != 0) { -// cc.qjqueue.AddIfNotPresent(qj) -// klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", -// qj.Namespace, qj.Name, qj.Status) -// return nil -// } -// } -func (aw *ActiveAppWrapper) IsActiveAppWrapper(name, namespace string) bool { - aw.activeAWMutex.RLock() - defer aw.activeAWMutex.RUnlock() - return aw.activeAW == nil || - (strings.Compare(aw.activeAW.Namespace, namespace) != 0 && - strings.Compare(aw.activeAW.Name, name) != 0) -} diff --git a/pkg/controller/queuejob/active_appwrapper_test.go b/pkg/controller/queuejob/active_appwrapper_test.go deleted file mode 100644 index 250ac1798..000000000 --- a/pkg/controller/queuejob/active_appwrapper_test.go +++ /dev/null @@ -1,43 +0,0 @@ -package queuejob_test - -import ( - "github.com/onsi/ginkgo" - "github.com/onsi/gomega" - arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" - "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob" - v1 "k8s.io/apimachinery/pkg/apis/meta/v1" -) - -var _ = ginkgo.Describe("Active App Wrapper Tests", func() { - var activeAppWrapper *queuejob.ActiveAppWrapper - ginkgo.When("Checking if is active app wrapper", func() { - ginkgo.BeforeEach(func() { - activeAppWrapper = queuejob.NewActiveAppWrapper() - }) - ginkgo.It("should return 'true' for a nil active app wrapper", func() { - gomega.Expect(activeAppWrapper.IsActiveAppWrapper("an-appwrapper-name", "unit-test-namespace"), - gomega.BeTrue()) - }) - ginkgo.It("should return 'true' for a the same app wrapper name and namespace", func() { - activeAppWrapper.AtomicSet(&arbv1.AppWrapper{ - ObjectMeta: v1.ObjectMeta{ - Name: "an-appwrapper-name", - Namespace: "unit-test-namespace", - }, - }) - gomega.Expect(activeAppWrapper.IsActiveAppWrapper("an-appwrapper-name", "unit-test-namespace"), - gomega.BeTrue()) - }) - ginkgo.It("should return 'false' for a the same app wrapper name and namespace", func() { - activeAppWrapper.AtomicSet(&arbv1.AppWrapper{ - ObjectMeta: v1.ObjectMeta{ - Name: "an-appwrapper-name", - Namespace: "unit-test-namespace", - }, - }) - gomega.Expect(activeAppWrapper.IsActiveAppWrapper("another-appwrapper-name", "other-unit-test-namespace"), - gomega.BeTrue()) - }) - }) - -}) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 3d6cf5919..4ed46ede6 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -36,6 +36,7 @@ import ( "math/rand" "reflect" "runtime/debug" + "sync" // "runtime/debug" "sort" @@ -171,7 +172,8 @@ type XController struct { quotaManager quota.QuotaManagerInterface // Active Scheduling AppWrapper - schedulingAW *ActiveAppWrapper + schedulingAW *arbv1.AppWrapper + schedulingMutex sync.RWMutex } type JobAndClusterAgent struct { @@ -230,7 +232,7 @@ func NewJobController(config *rest.Config, serverOption *options.ServerOption) * updateQueue: cache.NewFIFO(GetQueueJobKey), qjqueue: NewSchedulingQueue(), cache: clusterstatecache.New(config), - schedulingAW: NewActiveAppWrapper(), + schedulingAW: nil, } cc.metricsAdapter = adapter.New(serverOption, config, cc.cache) @@ -1103,15 +1105,17 @@ func (qjm *XController) ScheduleNext() { // check if we have enough compute resources for it // if we have enough compute resources then we set the AllocatedReplicas to the total // amount of resources asked by the job + qjm.schedulingMutex.Lock() qj, err := qjm.qjqueue.Pop() if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) return // Try to pop qjqueue again } - qjm.schedulingAW.AtomicSet(qj) + qjm.schedulingAW = qj + qjm.schedulingMutex.Unlock() // ensure that current active appwrapper is reset at the end of this function, to prevent // the appwrapper from being added in synch job - defer qjm.schedulingAW.AtomicSet(nil) + defer qjm.schedulingAWAtomicSet(nil) scheduleNextRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) scheduleNextRetrier.SetJitter(0.05) @@ -1183,7 +1187,7 @@ func (qjm *XController) ScheduleNext() { return nil } apiCacheAWJob.DeepCopyInto(qj) - qjm.schedulingAW.AtomicSet(qj) + qjm.schedulingAWAtomicSet(qj) } qj.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine @@ -1928,8 +1932,8 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // make sure qj has the latest information if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(4).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(4).Infof("[syncQueueJobJ] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) + klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(5).Infof("[syncQueueJobJ] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) cacheAWJob.DeepCopyInto(qj) } @@ -2079,7 +2083,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool //FIXME there is still are race condition here. if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { // One more check to ensure AW is not the current active schedule object - if cc.schedulingAW.IsActiveAppWrapper(qj.Name, qj.Namespace) { + if cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { cc.qjqueue.AddIfNotPresent(qj) klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", qj.Namespace, qj.Name, qj.Status) @@ -2395,3 +2399,33 @@ func (c *EtcdErrorClassifier) Classify(err error) retrier.Action { return retrier.Fail } } + +// IsActiveAppWrapper safely performs the comparison that was done inside the if block +// at line 1977 in the queuejob_controller_ex.go +// The code looked like this: +// +// if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && +// !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { +// // One more check to ensure AW is not the current active schedule object +// if cc.schedulingAW == nil || +// (strings.Compare(cc.schedulingAW.Namespace, qj.Namespace) != 0 && +// strings.Compare(cc.schedulingAW.Name, qj.Name) != 0) { +// cc.qjqueue.AddIfNotPresent(qj) +// klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", +// qj.Namespace, qj.Name, qj.Status) +// return nil +// } +// } + +func (cc *XController) IsActiveAppWrapper(name, namespace string) bool { + cc.schedulingMutex.RLock() + defer cc.schedulingMutex.RUnlock() + return cc.schedulingAW == nil || + (strings.Compare(cc.schedulingAW.Namespace, namespace) != 0 && + strings.Compare(cc.schedulingAW.Name, name) != 0) +} +func (qjm *XController) schedulingAWAtomicSet(qj *arbv1.AppWrapper) { + qjm.schedulingMutex.Lock() + qjm.schedulingAW = qj + qjm.schedulingMutex.Unlock() +} From 4f9f949354afb3614a75167eb8775f9ec2ae9e45 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:16:19 +0300 Subject: [PATCH 04/23] Preemption code updates. --- .../queuejob/queuejob_controller_ex.go | 55 +++++++++++++------ 1 file changed, 39 insertions(+), 16 deletions(-) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 4ed46ede6..76919fdb7 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -440,8 +440,11 @@ func (qjm *XController) PreemptQueueJobs() { var updateNewJob *arbv1.AppWrapper var message string - newjob, e := qjm.queueJobLister.AppWrappers(aw.Namespace).Get(aw.Name) - if e != nil { + newjob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh appwrapper") + if err != nil { + if !apiErrors.IsNotFound(err) { + klog.Warningf("[PreemptQueueJobs] failed in retriving a fresh copy of the appwrapper '%s/%s', %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) + } continue } newjob.Status.CanRun = false @@ -463,8 +466,11 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed newjob.Status.Running = 0 updateNewJob = newjob.DeepCopy() + if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { - klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) + if !apiErrors.IsNotFound(err) { + klog.Warningf("[PreemptQueueJobs] to update status of AppWrapper %s/%s: err %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) + } } //cannot use cleanup AW, since it puts AW back in running state go qjm.qjqueue.AddUnschedulableIfNotPresent(aw) @@ -522,7 +528,9 @@ func (qjm *XController) PreemptQueueJobs() { updateNewJob = newjob.DeepCopy() } if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { - klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) + if !apiErrors.IsNotFound(err) { + klog.Warningf("[PreemptQueueJobs] to update status of AppWrapper %s/%s: err %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) + } } if cleanAppWrapper { klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of requeuings exceeded.", aw.Name, aw.Namespace) @@ -543,16 +551,31 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { } for _, aw := range preemptAWs { - apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(aw.Namespace).Get(aw.Name) - if e != nil { - klog.Errorf("[preemptQWJobs] Failed to get AppWrapper to from API Cache %v/%v: %v", - aw.Namespace, aw.Name, e) - continue - } - apiCacheAWJob.Status.CanRun = false - if err := qjm.updateStatusInEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { - klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", - apiCacheAWJob.Namespace, apiCacheAWJob.Name, err) + preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + preemptRetrier.SetJitter(0.05) + err := preemptRetrier.Run(func() error { + apiCacheAWJob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[preemptAWJobs] get fresh app wrapper") + if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } + + klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %v/%v: %v", + aw.Namespace, aw.Name, retryErr) + return retryErr + } + apiCacheAWJob.Status.CanRun = false + if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); retryErr != nil { + klog.Warningf("[preemptAWJobs] Failed to update status of AppWrapper %v/%v: %v. Retrying", + apiCacheAWJob.Namespace, apiCacheAWJob.Name, retryErr) + return retryErr + } + return nil + }) + if err != nil { + if !apiErrors.IsNotFound(err) { + klog.Warningf("[preemptAWJobs] Failed to preempt a ppWrapper %s/%s: err %v.", aw.Namespace, aw.Name, err) + } } } } @@ -707,9 +730,9 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } if len(name) == 0 { - klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) + klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) } - klog.Infof("[getAppWrapperCompletionStatus] Checking items completed for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) + klog.Infof("[getAppWrapperCompletionStatus] Checking items completed for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) status := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) if !status { From cc5c3371600eab16ef7a675dace8bd86e7a077d5 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 23 Jun 2023 13:23:58 +0300 Subject: [PATCH 05/23] Small locking issue fixed. --- pkg/controller/queuejob/queuejob_controller_ex.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 76919fdb7..5783aef83 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -1132,6 +1132,7 @@ func (qjm *XController) ScheduleNext() { qj, err := qjm.qjqueue.Pop() if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) + qjm.schedulingMutex.Unlock() return // Try to pop qjqueue again } qjm.schedulingAW = qj From feafd151c962ee2ed84c024197b69a92ba4cc085 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Tue, 27 Jun 2023 15:31:27 +0300 Subject: [PATCH 06/23] Fixed failed test. --- hack/run-e2e-kind.sh | 26 +- pkg/apis/controller/v1beta1/appwrapper.go | 8 +- .../queuejob/queuejob_controller_ex.go | 240 ++++++++++-------- test/e2e-kuttl/quota-forest/99-cleanup.yaml | 2 +- test/e2e/queue.go | 16 +- test/yaml/0006-aw-init-containers.yaml | 86 +++++++ 6 files changed, 238 insertions(+), 140 deletions(-) create mode 100644 test/yaml/0006-aw-init-containers.yaml diff --git a/hack/run-e2e-kind.sh b/hack/run-e2e-kind.sh index 8d5596329..35646070c 100755 --- a/hack/run-e2e-kind.sh +++ b/hack/run-e2e-kind.sh @@ -34,6 +34,7 @@ export CLUSTER_CONTEXT="--name test" export IMAGE_ECHOSERVER="kicbase/echo-server:1.0" export IMAGE_UBUNTU_LATEST="ubuntu:latest" export IMAGE_UBI_LATEST="registry.access.redhat.com/ubi8/ubi:latest" +export IMAGE_BUSY_BOX_LATEST="k8s.gcr.io/busybox:latest" export KIND_OPT=${KIND_OPT:=" --config ${ROOT_DIR}/hack/e2e-kind-config.yaml"} export KA_BIN=_output/bin export WAIT_TIME="20s" @@ -207,27 +208,20 @@ function kind-up-cluster { exit 1 fi - docker pull ${IMAGE_ECHOSERVER} - if [ $? -ne 0 ] - then - echo "Failed to pull ${IMAGE_ECHOSERVER}" - exit 1 - fi - - docker pull ${IMAGE_UBUNTU_LATEST} + docker pull ${IMAGE_UBI_LATEST} if [ $? -ne 0 ] then - echo "Failed to pull ${IMAGE_UBUNTU_LATEST}" + echo "Failed to pull ${IMAGE_UBI_LATEST}" exit 1 fi - - docker pull ${IMAGE_UBI_LATEST} + + docker pull ${IMAGE_BUSY_BOX_LATEST} if [ $? -ne 0 ] then - echo "Failed to pull ${IMAGE_UBI_LATEST}" + echo "Failed to pull ${IMAGE_BUSY_BOX_LATEST}" exit 1 fi - + if [[ "$MCAD_IMAGE_PULL_POLICY" = "Always" ]] then docker pull ${IMAGE_MCAD} @@ -244,7 +238,7 @@ function kind-up-cluster { fi docker images - for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD} ${IMAGE_UBI_LATEST} + for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD} ${IMAGE_UBI_LATEST} ${IMAGE_BUSY_BOX_LATEST} do kind load docker-image ${image} ${CLUSTER_CONTEXT} if [ $? -ne 0 ] @@ -330,8 +324,6 @@ function mcad-quota-management-down { echo "Failed to undeploy controller" exit 1 fi - echo "Waiting for the test namespace to be cleaned up.." - sleep 60 } function mcad-up { @@ -402,4 +394,4 @@ setup-mcad-env kuttl-tests mcad-quota-management-down mcad-up -go test ./test/e2e -v -timeout 120m -count=1 \ No newline at end of file +go test ./test/e2e -v -timeout 120m -count=1 diff --git a/pkg/apis/controller/v1beta1/appwrapper.go b/pkg/apis/controller/v1beta1/appwrapper.go index f5302b3d7..e4dcf0fc8 100644 --- a/pkg/apis/controller/v1beta1/appwrapper.go +++ b/pkg/apis/controller/v1beta1/appwrapper.go @@ -101,7 +101,7 @@ type AppWrapperService struct { } // AppWrapperResource is App Wrapper aggregation resource -//todo: To be depricated +// todo: To be depricated type AppWrapperResource struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` @@ -263,11 +263,15 @@ type AppWrapperStatus struct { // Represents the latest available observations of pods under appwrapper PendingPodConditions []PendingPodSpec `json:"pendingpodconditions"` + + // Re-queueing state fields + RequeueingTimeInSeconds int `json:"requeueing-time-seconds,omitempty"` + NumberOfRequeueings int `json:"number-of-requeueings,omitempty"` } type AppWrapperState string -//enqueued, active, deleting, succeeded, failed +// enqueued, active, deleting, succeeded, failed const ( AppWrapperStateEnqueued AppWrapperState = "Pending" AppWrapperStateActive AppWrapperState = "Running" diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 5783aef83..44ab1980d 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -36,12 +36,10 @@ import ( "math/rand" "reflect" "runtime/debug" - "sync" - - // "runtime/debug" "sort" "strconv" "strings" + "sync" "time" "github.com/eapache/go-resiliency/retrier" @@ -438,109 +436,128 @@ func (qjm *XController) PreemptQueueJobs() { continue } - var updateNewJob *arbv1.AppWrapper - var message string - newjob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh appwrapper") - if err != nil { - if !apiErrors.IsNotFound(err) { - klog.Warningf("[PreemptQueueJobs] failed in retriving a fresh copy of the appwrapper '%s/%s', %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) + preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + preemptRetrier.SetJitter(0.05) + err := preemptRetrier.Run(func() error { + var updateNewJob *arbv1.AppWrapper + var message string + newjob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh appwrapper") + if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } + klog.Warningf("[PreemptQueueJobs] failed in retriving a fresh copy of the appwrapper '%s/%s', %v. Will try to preempt on the next run", aw.Namespace, aw.Name, retryErr) + } + newjob.Status.CanRun = false + newjob.Status.FilterIgnore = true // update QueueJobState only + cleanAppWrapper := false + //If dispatch deadline is exceeded no matter what the state of AW, kill the job and set status as Failed. + if (aw.Status.State == arbv1.AppWrapperStateActive) && (aw.Spec.SchedSpec.DispatchDuration.Limit > 0) { + if aw.Spec.SchedSpec.DispatchDuration.Overrun { + index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "DispatchDeadlineExceeded") + if index < 0 { + message = fmt.Sprintf("Dispatch deadline exceeded. allowed to run for %v seconds", aw.Spec.SchedSpec.DispatchDuration.Limit) + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", message) + newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", "") + newjob.Status.Conditions[index] = *cond.DeepCopy() + } + //should the AW state be set in this method?? + newjob.Status.State = arbv1.AppWrapperStateFailed + newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed + newjob.Status.Running = 0 + updateNewJob = newjob.DeepCopy() + + // if retryErr := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded"); retryErr != nil { + updatedAW, retryErr := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded") + if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } + klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' failed, err=%v. Retrying", aw.Namespace, aw.Name, retryErr) + return retryErr + } + //cannot use cleanup AW, since it puts AW back in running state + qjm.qjqueue.AddUnschedulableIfNotPresent(updatedAW) + + //Move to next AW + return nil + } } - continue - } - newjob.Status.CanRun = false - cleanAppWrapper := false - //If dispatch deadline is exceeded no matter what the state of AW, kill the job and set status as Failed. - if (aw.Status.State == arbv1.AppWrapperStateActive) && (aw.Spec.SchedSpec.DispatchDuration.Limit > 0) { - if aw.Spec.SchedSpec.DispatchDuration.Overrun { - index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "DispatchDeadlineExceeded") + + if ((aw.Status.Running + aw.Status.Succeeded) < int32(aw.Spec.SchedSpec.MinAvailable)) && aw.Status.State == arbv1.AppWrapperStateActive { + index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "MinPodsNotRunning") if index < 0 { - message = fmt.Sprintf("Dispatch deadline exceeded. allowed to run for %v seconds", aw.Spec.SchedSpec.DispatchDuration.Limit) - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", message) + message = fmt.Sprintf("Insufficient number of Running and Completed pods, minimum=%d, running=%d, completed=%d.", aw.Spec.SchedSpec.MinAvailable, aw.Status.Running, aw.Status.Succeeded) + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", message) newjob.Status.Conditions = append(newjob.Status.Conditions, cond) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", "") + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", "") newjob.Status.Conditions[index] = *cond.DeepCopy() } - //should the AW state be set in this method?? - newjob.Status.State = arbv1.AppWrapperStateFailed - newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed - newjob.Status.Running = 0 - updateNewJob = newjob.DeepCopy() - if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { - if !apiErrors.IsNotFound(err) { - klog.Warningf("[PreemptQueueJobs] to update status of AppWrapper %s/%s: err %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) + if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { + newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds + } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { + newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds + } + + if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds > 0 { + if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Status.RequeueingTimeInSeconds { + newjob.Status.RequeueingTimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds } } - //cannot use cleanup AW, since it puts AW back in running state - go qjm.qjqueue.AddUnschedulableIfNotPresent(aw) - //Move to next AW - continue - } - } + if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Status.NumberOfRequeueings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { + newjob.Status.State = arbv1.AppWrapperStateDeleted + cleanAppWrapper = true + } else { + newjob.Status.NumberOfRequeueings += 1 + } - if ((aw.Status.Running + aw.Status.Succeeded) < int32(aw.Spec.SchedSpec.MinAvailable)) && aw.Status.State == arbv1.AppWrapperStateActive { - index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "MinPodsNotRunning") - if index < 0 { - message = fmt.Sprintf("Insufficient number of Running and Completed pods, minimum=%d, running=%d, completed=%d.", aw.Spec.SchedSpec.MinAvailable, aw.Status.Running, aw.Status.Succeeded) - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", message) - newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + updateNewJob = newjob.DeepCopy() } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", "") - newjob.Status.Conditions[index] = *cond.DeepCopy() - } + //If pods failed scheduling generate new preempt condition + message = fmt.Sprintf("Pods failed scheduling failed=%v, running=%v.", len(aw.Status.PendingPodConditions), aw.Status.Running) + index := getIndexOfMatchedCondition(newjob, arbv1.AppWrapperCondPreemptCandidate, "PodsFailedScheduling") + //ignore co-scheduler failed scheduling events. This is a temp + //work around until co-scheduler version 0.22.X perf issues are resolved. + if index < 0 { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) + newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) + newjob.Status.Conditions[index] = *cond.DeepCopy() + } - if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { - newjob.Spec.SchedSpec.Requeuing.TimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds - } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { - newjob.Spec.SchedSpec.Requeuing.TimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds + updateNewJob = newjob.DeepCopy() } - if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds > 0 { - if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Spec.SchedSpec.Requeuing.TimeInSeconds { - newjob.Spec.SchedSpec.Requeuing.TimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds + retryErr = qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- MinPodsNotRunning") + if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil } + klog.Warningf("[PreemptQueueJobs] update of spec and status for '%s/%s' failed, err=%v. Will try to preempt on the next run", aw.Namespace, aw.Name, retryErr) + return retryErr } - if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Spec.SchedSpec.Requeuing.NumRequeuings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { - newjob.Status.State = arbv1.AppWrapperStateDeleted - cleanAppWrapper = true - } else { - newjob.Spec.SchedSpec.Requeuing.NumRequeuings += 1 - } - - updateNewJob = newjob.DeepCopy() - } else { - //If pods failed scheduling generate new preempt condition - message = fmt.Sprintf("Pods failed scheduling failed=%v, running=%v.", len(aw.Status.PendingPodConditions), aw.Status.Running) - index := getIndexOfMatchedCondition(newjob, arbv1.AppWrapperCondPreemptCandidate, "PodsFailedScheduling") - //ignore co-scheduler failed scheduling events. This is a temp - //work around until co-scheduler version 0.22.X perf issues are resolved. - if index < 0 { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) - newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + if cleanAppWrapper { + klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of requeuings exceeded.", aw.Name, aw.Namespace) + go qjm.Cleanup(updateNewJob) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) - newjob.Status.Conditions[index] = *cond.DeepCopy() - } - - updateNewJob = newjob.DeepCopy() - } - if err := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { - if !apiErrors.IsNotFound(err) { - klog.Warningf("[PreemptQueueJobs] to update status of AppWrapper %s/%s: err %v. Will try to preempt on the next run", aw.Namespace, aw.Name, err) - } - } - if cleanAppWrapper { - klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of requeuings exceeded.", aw.Name, aw.Namespace) - go qjm.Cleanup(aw) - } else { - klog.V(4).Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to backoff queue.", aw.Name, aw.Namespace) - //Only back-off AWs that are in state running and not in state Failed - if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { - go qjm.backoff(aw, "PreemptionTriggered", string(message)) + //Only back-off AWs that are in state running and not in state Failed + if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { + klog.V(4).Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to backoff queue.", aw.Name, aw.Namespace) + klog.V(4).Infof("[PreemptQueueJobs] AppWrapper '%s/%s' status: CanRun %d", aw.Name, aw.Namespace, aw.Status.CanRun) + go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) + } } + return nil + }) + if err != nil { + klog.Infof("[PreemptQueueJobs] failed preemption for app wrapper %s/%s, err= %v. Will attempt on the next run", aw.Name, aw.Namespace, err) } } } @@ -574,7 +591,7 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { }) if err != nil { if !apiErrors.IsNotFound(err) { - klog.Warningf("[preemptAWJobs] Failed to preempt a ppWrapper %s/%s: err %v.", aw.Namespace, aw.Name, err) + klog.Warningf("[preemptAWJobs] Failed to preempt app Wrapper '%s/%s'. App wrapper is not found", aw.Namespace, aw.Name) } } } @@ -745,7 +762,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } - klog.V(4).Infof("[getAppWrapperCompletionStatus] countCompletionRequired %v, podsRunning %v, podsPending %v", countCompletionRequired, caw.Status.Running, caw.Status.Pending) + klog.V(4).Infof("[getAppWrapperCompletionStatus] '%s/%s' countCompletionRequired %v, podsRunning %v, podsPending %v", caw.Namespace, caw.Name, countCompletionRequired, caw.Status.Running, caw.Status.Pending) //Set new status only when completion required flag is present in genericitems array if countCompletionRequired > 0 { @@ -1388,6 +1405,10 @@ func (qjm *XController) ScheduleNext() { //Quota was allocated for this appwrapper, release it. qjm.quotaManager.Release(qj) } + if apiErrors.IsNotFound(retryErr) { + klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) + return nil + } return retryErr } tempAW.DeepCopyInto(qj) @@ -1462,7 +1483,7 @@ func (cc *XController) updateEtcd(currentAppwrapper *arbv1.AppWrapper, caller st if err != nil { return nil, err } - if larger(updatedAppwrapper.ResourceVersion, currentAppwrapper.ResourceVersion) { + if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { klog.Warningf("[updateEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) } @@ -1478,11 +1499,11 @@ func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, c if err != nil { return err } - if larger(updatedAppwrapper.ResourceVersion, currentAppwrapper.ResourceVersion) { + if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { klog.Warningf("[updateStatusInEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) klog.Warningf("[updateStatusInEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) } - klog.V(10).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + klog.V(4).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) return nil } @@ -1616,7 +1637,7 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri time.Sleep(time.Duration(qjm.serverOption.BackoffTime) * time.Second) qjm.qjqueue.MoveToActiveQueueIfExists(q) - klog.V(3).Infof("[backoff] %s activeQ.Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", q.Name, + klog.V(3).Infof("[backoff] '%s/%s' activeQ Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", q.Namespace, q.Name, qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ(q), qjm.qjqueue.IfExistUnschedulableQ(q), q, q.ResourceVersion, q.Status) } @@ -1716,7 +1737,7 @@ func (qjm *XController) UpdateQueueJobs() { if err != nil { klog.Errorf("[UpdateQueueJobs] Fail to enqueue %s to eventQueue, ignore. *Delay=%.6f seconds &qj=%p Version=%s Status=%+v err=%#v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status, err) } else { - klog.V(4).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) + klog.V(10).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) } } } @@ -1762,8 +1783,6 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { oldQJ, ok := oldObj.(*arbv1.AppWrapper) if !ok { klog.Errorf("[Informer-updateQJ] old object is not AppWrapper. enqueue(newQJ). oldObj=%+v", oldObj) - klog.V(4).Infof("[Informer-updateQJ] %s *Delay=%.6f seconds BadOldObject enqueue &newQJ=%p Version=%s Status=%+v", newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ, newQJ.ResourceVersion, newQJ.Status) - //cc.enqueue(newQJ) return } // AppWrappers may come out of order. Ignore old ones. @@ -1774,10 +1793,13 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { } if equality.Semantic.DeepEqual(newQJ.Status, oldQJ.Status) { - klog.V(4).Infof("[Informer-updateQJ] No change to status field of AppWrapper: %s, oldAW=%+v, newAW=%+v.", newQJ.Name, oldQJ.Status, newQJ.Status) + klog.Warningf("[Informer-updateQJ] No change to status field of AppWrapper: '%s/%s', oldAW=%+v, newAW=%+v.", newQJ.Namespace, newQJ.Name, oldQJ.Status, newQJ.Status) + //if newQJ.Status.FilterIgnore { + // klog.Warningf("[Informer-updateQJ] Status.FilterIgnore set for AppWrapper: '%s/%s' not enqueing", newQJ.Namespace, newQJ.Name) + //} } - klog.V(4).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%+v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) + klog.V(4).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) cc.enqueue(newQJ) } @@ -1944,7 +1966,7 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { if err != nil { // Implicit detection of deletion if apierrors.IsNotFound(err) { - klog.V(4).Infof("[syncQueueJob] AppWrapper %s not found in cache: info=%+v", qj.Name, err) + klog.Warningf("[syncQueueJob] AppWrapper '%s/%s' not found in cache.", qj.Namespace, qj.Name) cc.Cleanup(qj) cc.qjqueue.Delete(qj) return nil @@ -1956,8 +1978,8 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // make sure qj has the latest information if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(5).Infof("[syncQueueJobJ] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) + klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) cacheAWJob.DeepCopyInto(qj) } @@ -2058,11 +2080,11 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool // If this the first time seeing this AW, no need to delete. stateLen := len(qj.Status.State) if stateLen > 0 { - klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job %s because it was preempted, status=%+v\n", qj.Name, qj.Status) + klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status=%+v", qj.Namespace, qj.Name, qj.Status) err = cc.Cleanup(qj) - klog.V(8).Infof("[manageQueueJob] Validation after deleting resources for AppWrapper Job %s because it was be preempted, status=%+v\n", qj.Name, qj.Status) + klog.V(8).Infof("[manageQueueJob] Validation after deleting resources for AppWrapper Job '%s/%s' because it was be preempted, status=%+v", qj.Namespace, qj.Name, qj.Status) if err != nil { - klog.Errorf("[manageQueueJob] Fail to delete resources for AppWrapper Job %s, err=%#v", qj.Name, err) + klog.Errorf("[manageQueueJob] Fail to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) return err } } @@ -2070,7 +2092,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.State = arbv1.AppWrapperStateEnqueued // add qj to qjqueue only when it is not in UnschedulableQ if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) return nil } @@ -2104,7 +2126,6 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool } // End of first execution of qj to add to qjqueue for ScheduleNext //Handle recovery condition - //FIXME there is still are race condition here. if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { // One more check to ensure AW is not the current active schedule object if cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { @@ -2336,9 +2357,8 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) klog.V(10).Infof("[worker-manageQJ] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } - //FIXME use the updateStatusInEtcd - if _, err := cc.updateEtcd(qj, "[worker-manageQJ] -- set dispatched true"); err != nil { - klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", + if err := cc.updateStatusInEtcd(qj, "[worker-manageQJ] -- set dispatched true"); err != nil { + klog.Errorf("Failed to update status of AppWrapper %s/%s: %s", qj.Namespace, qj.Name, err) return err } @@ -2400,14 +2420,14 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { return nil } func (cc *XController) getAppWrapper(namespace string, name string, caller string) (*arbv1.AppWrapper, error) { - klog.V(4).Infof("[getAppWrapper] geting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) + klog.V(5).Infof("[getAppWrapper] geting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level if err != nil { klog.Errorf("[getAppWrapper] geting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) return nil, err } - klog.V(4).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) + klog.V(5).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) return apiCacheAWJob.DeepCopy(), nil } diff --git a/test/e2e-kuttl/quota-forest/99-cleanup.yaml b/test/e2e-kuttl/quota-forest/99-cleanup.yaml index 3998eb375..59a37b7d1 100644 --- a/test/e2e-kuttl/quota-forest/99-cleanup.yaml +++ b/test/e2e-kuttl/quota-forest/99-cleanup.yaml @@ -6,4 +6,4 @@ error: [] unitTest: false delete: [] commands: - - command: kubectl delete namespace test \ No newline at end of file + - command: kubectl delete namespace test --wait diff --git a/test/e2e/queue.go b/test/e2e/queue.go index eb4f8c111..a748bc239 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -140,7 +140,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) It("MCAD CPU Requeuing - Completion After Enough Requeuing Times Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD CPU Requeuing Test - Started.\n") + fmt.Fprintf(os.Stdout, "[e2e] Completion After Enough Requeuing Times Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper @@ -414,7 +414,7 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).NotTo(HaveOccurred()) }) - It("MCAD Scheduling Fail Fast Preemption Test", func() { + FIt("MCAD Scheduling Fail Fast Preemption Test", func() { fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Started.\n") context := initTestContext() @@ -441,7 +441,7 @@ var _ = Describe("AppWrapper E2E Test", func() { err = waitAWPodsPending(context, aw2) Expect(err).NotTo(HaveOccurred()) - // This should fit on cluster after AW aw-deployment-1-700-cpu above is automatically preempted on + // This should fit on cluster after AW aw-deployment-1-850-cpu above is automatically preempted on // scheduling failure aw3 := createGenericDeploymentCustomPodResourcesWithCPUAW( context, appendRandomString("aw-ff-deployment-2-340-cpu"), "340m", "340m", 2, 60) @@ -551,17 +551,13 @@ var _ = Describe("AppWrapper E2E Test", func() { time.Sleep(1 * time.Minute) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) if err != nil { - fmt.Fprint(GinkgoWriter, "Error getting status") + fmt.Fprintf(GinkgoWriter, "Error getting status, %v\n", err) } - pass := false + Expect(err).Should(Succeed()) + Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateCompleted)) fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) - if aw1.Status.State == arbv1.AppWrapperStateCompleted { - pass = true - } - Expect(pass).To(BeTrue()) appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion Test - Completed.\n") - }) It("MCAD Multi-Item Job Completion Test", func() { diff --git a/test/yaml/0006-aw-init-containers.yaml b/test/yaml/0006-aw-init-containers.yaml new file mode 100644 index 000000000..4b6c7a690 --- /dev/null +++ b/test/yaml/0006-aw-init-containers.yaml @@ -0,0 +1,86 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: bronnze-init-job-0006 + namespace: test + labels: + quota_context: bronze + quota_service: gold +spec: + schedulingSpec: + minAvailable: 3 + requeuing: + timeInSeconds: 60 + growthType: exponential + maxNumRequeuings: 0 + resources: + GenericItems: + - replicas: 1 + completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + name: bronnze-init-job-0006 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronnze-init-job-0006 + spec: + parallelism: 3 + terminationGracePeriodSeconds: 1 + restartPolicy: Never + template: + metadata: + name: bronnze-init-job-0006 + namespace: test + labels: + appwrapper.mcad.ibm.com: bronnze-init-job-0006 + spec: + terminationGracePeriodSeconds: 1 + restartPolicy: Never + initContainers: + - name: job-init-container + image: 'k8s.gcr.io/busybox:latest' + imagePullPolicy: IfNotPresent + command: + - sh + - '-c' + - | + sleep 200 + resources: + requests: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi + containers: + - name: job-container + image: 'k8s.gcr.io/busybox:latest' + imagePullPolicy: IfNotPresent + command: + - sh + - '-c' + - | + sleep 10 + resources: + requests: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi + limits: + cpu: 500m + nvidia.com/gpu: 0 + memory: 300Mi From ef5c6a871beeff82108a0dad4de3831e2630f27e Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Tue, 27 Jun 2023 15:38:29 +0300 Subject: [PATCH 07/23] Removed focused test. --- test/e2e/queue.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index a748bc239..eca720c27 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -414,7 +414,7 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).NotTo(HaveOccurred()) }) - FIt("MCAD Scheduling Fail Fast Preemption Test", func() { + It("MCAD Scheduling Fail Fast Preemption Test", func() { fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Started.\n") context := initTestContext() From 8763ebe4b181146065305c69a863e61e7edbbb3b Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Wed, 28 Jun 2023 13:47:56 +0300 Subject: [PATCH 08/23] Bug fixes and CRD documentation updates. --- .../crd/bases/mcad.ibm.com_appwrappers.yaml | 10 +++++ .../crds/mcad.ibm.com_appwrappers.yaml | 10 +++++ .../queuejob/queuejob_controller_ex.go | 43 ++++++++++--------- test/e2e/queue.go | 4 +- test/e2e/util.go | 12 +++--- 5 files changed, 50 insertions(+), 29 deletions(-) diff --git a/config/crd/bases/mcad.ibm.com_appwrappers.yaml b/config/crd/bases/mcad.ibm.com_appwrappers.yaml index 040655b35..c588bd4c3 100644 --- a/config/crd/bases/mcad.ibm.com_appwrappers.yaml +++ b/config/crd/bases/mcad.ibm.com_appwrappers.yaml @@ -821,6 +821,16 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer + number-of-requeueings: + description: Field to keep track of how many times a requeuing event has been triggered + format: int32 + type: integer + default: 0 + requeueing-time-seconds: + description: Field to keep track of total number of seconds spent in requeueing + format: int32 + type: integer + default: 0 type: object required: - spec diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml index 040655b35..c588bd4c3 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml @@ -821,6 +821,16 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer + number-of-requeueings: + description: Field to keep track of how many times a requeuing event has been triggered + format: int32 + type: integer + default: 0 + requeueing-time-seconds: + description: Field to keep track of total number of seconds spent in requeueing + format: int32 + type: integer + default: 0 type: object required: - spec diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 44ab1980d..afe5d6a4b 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -469,9 +469,7 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.Running = 0 updateNewJob = newjob.DeepCopy() - // if retryErr := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded"); retryErr != nil { - updatedAW, retryErr := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded") - if retryErr != nil { + if retryErr := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded"); retryErr != nil { if apiErrors.IsNotFound(retryErr) { return nil } @@ -479,7 +477,7 @@ func (qjm *XController) PreemptQueueJobs() { return retryErr } //cannot use cleanup AW, since it puts AW back in running state - qjm.qjqueue.AddUnschedulableIfNotPresent(updatedAW) + go qjm.qjqueue.AddUnschedulableIfNotPresent(updateNewJob) //Move to next AW return nil @@ -497,6 +495,9 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.Conditions[index] = *cond.DeepCopy() } + if aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { + aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = aw.Spec.SchedSpec.Requeuing.TimeInSeconds + } if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { @@ -509,7 +510,7 @@ func (qjm *XController) PreemptQueueJobs() { } } - if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Status.NumberOfRequeueings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { + if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Spec.SchedSpec.Requeuing.NumRequeuings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { newjob.Status.State = arbv1.AppWrapperStateDeleted cleanAppWrapper = true } else { @@ -550,7 +551,6 @@ func (qjm *XController) PreemptQueueJobs() { //Only back-off AWs that are in state running and not in state Failed if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { klog.V(4).Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to backoff queue.", aw.Name, aw.Namespace) - klog.V(4).Infof("[PreemptQueueJobs] AppWrapper '%s/%s' status: CanRun %d", aw.Name, aw.Namespace, aw.Status.CanRun) go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) } } @@ -658,7 +658,7 @@ func (qjm *XController) GetQueueJobsEligibleForPreemption() []*arbv1.AppWrapper condition = lastCondition } - requeuingTimeInSeconds := value.Spec.SchedSpec.Requeuing.TimeInSeconds + requeuingTimeInSeconds := value.Status.RequeueingTimeInSeconds minAge := condition.LastTransitionMicroTime.Add(time.Duration(requeuingTimeInSeconds) * time.Second) currentTime := time.Now() @@ -666,19 +666,15 @@ func (qjm *XController) GetQueueJobsEligibleForPreemption() []*arbv1.AppWrapper continue } - if value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { - value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = value.Spec.SchedSpec.Requeuing.TimeInSeconds - } - if replicas > 0 { - klog.V(3).Infof("AppWrapper %s is eligible for preemption Running: %v - minAvailable: %v , Succeeded: %v !!! \n", value.Name, value.Status.Running, replicas, value.Status.Succeeded) + klog.V(3).Infof("AppWrapper '%s/%s' is eligible for preemption Running: %d - minAvailable: %d , Succeeded: %d !!!", value.Namespace, value.Name, value.Status.Running, replicas, value.Status.Succeeded) qjobs = append(qjobs, value) } } else { // Preempt when schedulingSpec stanza is not set but pods fails scheduling. // ignore co-scheduler pods if len(value.Status.PendingPodConditions) > 0 { - klog.V(3).Infof("AppWrapper %s is eligible for preemption Running: %v , Succeeded: %v due to failed scheduling !!! \n", value.Name, value.Status.Running, value.Status.Succeeded) + klog.V(3).Infof("AppWrapper '%s/%s' is eligible for preemption Running: %d , Succeeded: %d due to failed scheduling !!!", value.Namespace, value.Status.Running, value.Status.Succeeded) qjobs = append(qjobs, value) } } @@ -1166,8 +1162,10 @@ func (qjm *XController) ScheduleNext() { qj.ResourceVersion, qj.Status) apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- get fresh copy after queue pop") - // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, retryErr) return retryErr } @@ -1220,6 +1218,9 @@ func (qjm *XController) ScheduleNext() { klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- after dynamic priority pop") if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, retryErr) return err } @@ -1344,6 +1345,9 @@ func (qjm *XController) ScheduleNext() { if updateLabels { tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] update labels") if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } klog.Warningf("[ScheduleNext] [Agent Mode] Failed to added get fresh copy of the app wrapper '%s/%s' to update quota lables, err = %v", qj.Namespace, qj.Name, retryErr) return retryErr } @@ -1617,6 +1621,7 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri if err != nil { return err } + q.Status.DeepCopyInto(&apiCacheAWJob.Status) apiCacheAWJob.Status.QueueJobState = arbv1.AppWrapperCondBackoff apiCacheAWJob.Status.FilterIgnore = true // update QueueJobState only, no work needed // Update condition @@ -1625,7 +1630,6 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri klog.Warningf("[backoff] Failed to updated AW status in etcd '%s/%s'. Continuing with possible stale object without updating conditions. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) return err } - apiCacheAWJob.DeepCopyInto(q) return nil }) if err != nil { @@ -1794,9 +1798,6 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { if equality.Semantic.DeepEqual(newQJ.Status, oldQJ.Status) { klog.Warningf("[Informer-updateQJ] No change to status field of AppWrapper: '%s/%s', oldAW=%+v, newAW=%+v.", newQJ.Namespace, newQJ.Name, oldQJ.Status, newQJ.Status) - //if newQJ.Status.FilterIgnore { - // klog.Warningf("[Informer-updateQJ] Status.FilterIgnore set for AppWrapper: '%s/%s' not enqueing", newQJ.Namespace, newQJ.Name) - //} } klog.V(4).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) @@ -1935,7 +1936,7 @@ func (cc *XController) worker() { if queuejob == nil { if acc, err := meta.Accessor(obj); err != nil { - klog.Warningf("[worker] Failed to get AppWrapper for %v/%v", acc.GetNamespace(), acc.GetName()) + klog.Warningf("[worker] Failed to get AppWrapper for '%s/%s'", acc.GetNamespace(), acc.GetName()) } return nil @@ -1956,7 +1957,7 @@ func (cc *XController) worker() { if err00 := cc.enqueueIfNotPresent(item); err00 != nil { klog.Errorf("[worker] Failed to re-enqueue item, err %v", err00) } - klog.Warningf("[worker] Item re-enqueued", err) + klog.Warning("[worker] Item re-enqueued") return } } @@ -2210,7 +2211,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch err := cc.updateStatusInEtcd(qj, "manageQueueJob - afterEtcdDispatching") if err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + klog.Errorf("[manageQueueJob] Error updating etc for AW job='%s/%s' Status=%v err=%v", qj.Namespace, qj.Name, qj.Status, err) return err } diff --git a/test/e2e/queue.go b/test/e2e/queue.go index eca720c27..c8d5cf260 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -217,7 +217,7 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createDeploymentAW(context, "aw-deployment-3") appwrappers = append(appwrappers, aw) - fmt.Fprintf(os.Stdout, "[e2e] Awaiting %d pods running for AW %s.\n", aw.Spec.SchedSpec.MinAvailable, aw.Name) + fmt.Fprintf(GinkgoWriter, "[e2e] Awaiting %d pods running for AW %s.\n", aw.Spec.SchedSpec.MinAvailable, aw.Name) err := waitAWPodsReady(context, aw) Expect(err).NotTo(HaveOccurred()) }) @@ -595,7 +595,7 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAWGenericItemWithoutStatus(context, "aw-test-job-with-comp-44") err1 := waitAWPodsReady(context, aw) - fmt.Fprintf(os.Stdout, "The error is: %v", err1) + fmt.Fprintf(GinkgoWriter, "The error is: %v", err1) Expect(err1).To(HaveOccurred()) fmt.Fprintf(os.Stdout, "[e2e] MCAD GenericItem Without Status Test - Completed.\n") diff --git a/test/e2e/util.go b/test/e2e/util.go index 9dd2248af..cd56e325b 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1085,32 +1085,32 @@ func createDeploymentAWwith550CPU(context *context, name string) *arbv1.AppWrapp rb := []byte(`{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": { - "name": "aw-deployment-2-550cpu", + "name": "`+name+`", "namespace": "test", "labels": { - "app": "aw-deployment-2-550cpu" + "app": "`+name+`" } }, "spec": { "replicas": 2, "selector": { "matchLabels": { - "app": "aw-deployment-2-550cpu" + "app": "`+name+`" } }, "template": { "metadata": { "labels": { - "app": "aw-deployment-2-550cpu" + "app": "`+name+`" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-550cpu" + "appwrapper.mcad.ibm.com/appwrapper-name": "`+name+`" } }, "spec": { "containers": [ { - "name": "aw-deployment-2-550cpu", + "name": "`+name+`", "image": "kicbase/echo-server:1.0", "resources": { "requests": { From ef9af3fed10692d71501f801d458b8bcb9537eea Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Wed, 28 Jun 2023 14:56:04 +0300 Subject: [PATCH 09/23] Fixed misspelling --- pkg/controller/queuejob/queuejob_controller_ex.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 33fc76efd..4d5310e8d 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -2432,11 +2432,11 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { return nil } func (cc *XController) getAppWrapper(namespace string, name string, caller string) (*arbv1.AppWrapper, error) { - klog.V(5).Infof("[getAppWrapper] geting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) + klog.V(5).Infof("[getAppWrapper] getting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level if err != nil { - klog.Errorf("[getAppWrapper] geting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) + klog.Errorf("[getAppWrapper] getting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) return nil, err } klog.V(5).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) From 2edc4f2ded791fa596bef4fed003b608b1bf9769 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Wed, 28 Jun 2023 16:32:09 +0300 Subject: [PATCH 10/23] Fix bad merge --- .../queuejob/queuejob_controller_ex.go | 140 +++++++++--------- 1 file changed, 72 insertions(+), 68 deletions(-) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 4d5310e8d..dca0a4723 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -42,8 +42,8 @@ import ( "sync" "time" - "github.com/gogo/protobuf/proto" "github.com/eapache/go-resiliency/retrier" + "github.com/gogo/protobuf/proto" qmutils "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota/quotaforestmanager" @@ -1324,80 +1324,84 @@ func (qjm *XController) ScheduleNext() { qjm.cache.GetUnallocatedResources(), priorityindex, qj, "") klog.Infof("[ScheduleNext] [Agent Mode] Appwrapper '%s/%s' with resources %v to be scheduled on aggregated idle resources %v", qj.Namespace, qj.Name, aggqj, resources) - // Assume preemption will remove low priroity AWs in the system, optimistically dispatch such AWs + // Assume preemption will remove low priroity AWs in the system, optimistically dispatch such AWs - if aggqj.LessEqual(resources) { - unallocatedHistogramMap := qjm.cache.GetUnallocatedHistograms() - if !qjm.nodeChecks(unallocatedHistogramMap, qj) { - klog.V(4).Infof("[ScheduleNext] Optimistic dispatch for AW %v in namespace %v requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", qj.Name, qj.Namespace, qjm.GetAggregatedResources(qj), proto.MarshalTextString(unallocatedHistogramMap["gpu"])) - } - // Now evaluate quota - fits := true - klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if qjm.serverOption.QuotaEnabled { - if qjm.quotaManager != nil { - // Quota tree design: - // - All AppWrappers without quota submission will consume quota from the 'default' node. - // - All quota trees in the system should have a 'default' node so AppWrappers without - // quota specification can be dispatched - // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value - // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be - // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' - // such node borrows quota from other nodes already in the system) - apiCacheAWJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) - if err != nil { - klog.Errorf("[ScheduleNext] Failed to get AppWrapper from API Cache %v/%v: %v", - qj.Namespace, qj.Name, err) - continue - } - allTrees := qjm.quotaManager.GetValidQuotaLabels() - newLabels := make(map[string]string) - for key, value := range apiCacheAWJob.Labels { - newLabels[key] = value - } - updateLabels := false - for _, treeName := range allTrees { - if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { - newLabels[treeName] = "default" - updateLabels = true + if aggqj.LessEqual(resources) { + unallocatedHistogramMap := qjm.cache.GetUnallocatedHistograms() + if !qjm.nodeChecks(unallocatedHistogramMap, qj) { + klog.V(4).Infof("[ScheduleNext] [Agent Mode] Optimistic dispatch for AW '%s/%s' requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", + qj.Name, qj.Namespace, qjm.GetAggregatedResources(qj), proto.MarshalTextString(unallocatedHistogramMap["gpu"])) + } + // Now evaluate quota + fits := true + klog.V(4).Infof("[ScheduleNext] [Agent Mode] available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", + qj.Name, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if qjm.serverOption.QuotaEnabled { + if qjm.quotaManager != nil { + // Quota tree design: + // - All AppWrappers without quota submission will consume quota from the 'default' node. + // - All quota trees in the system should have a 'default' node so AppWrappers without + // quota specification can be dispatched + // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value + // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be + // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' + // such node borrows quota from other nodes already in the system) + allTrees := qjm.quotaManager.GetValidQuotaLabels() + newLabels := make(map[string]string) + for key, value := range qj.Labels { + newLabels[key] = value } - } - if updateLabels { - apiCacheAWJob.SetLabels(newLabels) - if err := qjm.updateEtcd(apiCacheAWJob, "ScheduleNext - setDefaultQuota"); err == nil { - klog.V(3).Infof("[ScheduleNext] Default quota added to AW %v", qj.Name) - } else { - klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW %v, skipping dispatch of AW", qj.Name) - return + updateLabels := false + for _, treeName := range allTrees { + if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { + newLabels[treeName] = "default" + updateLabels = true + } } - } - var msg string - var preemptAWs []*arbv1.AppWrapper - quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) - if quotaFits { - klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - // Set any jobs that are marked for preemption - qjm.preemptAWJobs(preemptAWs) - } else { // Not enough free quota to dispatch appwrapper - dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." - if len(msg) > 0 { - dispatchFailedReason += " " - dispatchFailedReason += msg + if updateLabels { + tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] update labels") + if retryErr != nil { + if apiErrors.IsNotFound(retryErr) { + return nil + } + klog.Warningf("[ScheduleNext] [Agent Mode] Failed to added get fresh copy of the app wrapper '%s/%s' to update quota lables, err = %v", qj.Namespace, qj.Name, retryErr) + return retryErr + } + tempAW.SetLabels(newLabels) + updatedAW, retryErr := qjm.updateEtcd(tempAW, "ScheduleNext [Agent Mode] - setDefaultQuota") + if retryErr != nil { + return retryErr + } + klog.Infof("[ScheduleNext] [Agent Mode] Default quota added to AW '%s/%s'", qj.Namespace, qj.Name) + updatedAW.DeepCopyInto(qj) + } + var msg string + var preemptAWs []*arbv1.AppWrapper + quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) + if quotaFits { + klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + // Set any jobs that are marked for preemption + qjm.preemptAWJobs(preemptAWs) + } else { // Not enough free quota to dispatch appwrapper + dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." + if len(msg) > 0 { + dispatchFailedReason += " " + dispatchFailedReason += msg + } + klog.V(3).Infof("[ScheduleNext] [Agent Mode] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", + qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) } - klog.V(3).Infof("[ScheduleNext] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", - qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) + fits = quotaFits + } else { + fits = false + //Quota manager not initialized + dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." + klog.Errorf("[ScheduleNext] [Agent Mode] Quota evaluation is enabled but not initialized. AppWrapper '%s/%s' does not have enough quota", qj.Namespace, qj.Name) } - fits = quotaFits } else { - fits = false - //Quota manager not initialized - dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." - klog.Errorf("[ScheduleNext] Quota evaluation is enabled but not initialized. AppWrapper %s/%s does not have enough quota\n", qj.Name, qj.Namespace) + klog.V(4).Infof("[ScheduleNext] [Agent Mode] HOL quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, + qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } - } else { - klog.V(4).Infof("[ScheduleNext] HOL quota evaluation not enabled for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched if fits { // aw is ready to go! From ed9227b9d960af94755730fdbea8c61ae985b5f2 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:56:58 +0300 Subject: [PATCH 11/23] E2E test improvements --- test/e2e/queue.go | 35 ++++++++++++++++++++++++++++------- test/e2e/util.go | 20 ++++++++++---------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index c8d5cf260..30a4d3839 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -42,6 +42,7 @@ import ( . "github.com/onsi/gomega" arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/wait" ) var _ = Describe("AppWrapper E2E Test", func() { @@ -724,7 +725,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) - It("Create AppWrapper - Generic 50 Deployment Only - 2 pods each", func() { + FIt("Create AppWrapper - Generic 50 Deployment Only - 2 pods each", func() { fmt.Fprintf(os.Stdout, "[e2e] Generic 50 Deployment Only - 2 pods each - Started.\n") context := initTestContext() @@ -749,14 +750,34 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericDeploymentWithCPUAW(context, name, cpuDemand, replicas) aws = append(aws, aw) } - // Give the deployments time to create pods - // FIXME: do not assume that the pods are in running state in the order of submission. - time.Sleep(2 * time.Minute) - for i := 0; i < len(aws); i++ { - err := waitAWReadyQuiet(context, aws[i]) - Expect(err).NotTo(HaveOccurred()) + time.Sleep(90 * time.Second) + uncompletedAWS := aws + // wait for pods to become ready, don't assume that they are ready in the order of submission. + err := wait.Poll(500*time.Millisecond, 3*time.Minute, func() (done bool, err error) { + t := time.Now() + toCheckAWS := make([]*arbv1.AppWrapper, 0, len(aws)) + for _, aw := range uncompletedAWS { + err := waitAWPodsReadyEx(context, aw, 100*time.Millisecond, int(aw.Spec.SchedSpec.MinAvailable), true) + if err != nil { + toCheckAWS = append(toCheckAWS, aw) + } + } + uncompletedAWS = toCheckAWS + fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - There are %d app wrappers without ready pods at time %s\n", len(toCheckAWS), t.Format(time.RFC3339)) + if len(toCheckAWS) == 0 { + return true, nil + } + return false, nil + }) + if err != nil { + fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - There are %d app wrappers without ready pods, err = %v\n", len(uncompletedAWS), err) + for _, uaw := range uncompletedAWS { + fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - Uncompleted AW '%s/%s' with status %v\n", uaw.Namespace, uaw.Name, uaw.Status) + } } + Expect(err).Should(Succeed(), "All app wrappers should have completed") + fmt.Fprintf(os.Stdout, "[e2e] Generic 50 Deployment Only - 2 pods each - Completed, awaiting app wrapper clean up.\n") }) /* diff --git a/test/e2e/util.go b/test/e2e/util.go index cd56e325b..df0b37fb0 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -696,7 +696,7 @@ func awNamespacePhase(ctx *context, aw *arbv1.AppWrapper, phase []v1.NamespacePh } func waitAWPodsReady(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, int(aw.Spec.SchedSpec.MinAvailable), false) + return waitAWPodsReadyEx(ctx, aw, ninetySeconds, int(aw.Spec.SchedSpec.MinAvailable), false) } func waitAWPodsCompleted(ctx *context, aw *arbv1.AppWrapper, timeout time.Duration) error { @@ -708,7 +708,7 @@ func waitAWPodsNotCompleted(ctx *context, aw *arbv1.AppWrapper) error { } func waitAWReadyQuiet(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, int(aw.Spec.SchedSpec.MinAvailable), true) + return waitAWPodsReadyEx(ctx, aw, ninetySeconds, int(aw.Spec.SchedSpec.MinAvailable), true) } func waitAWAnyPodsExists(ctx *context, aw *arbv1.AppWrapper) error { @@ -736,8 +736,8 @@ func waitAWPending(ctx *context, aw *arbv1.AppWrapper) error { []v1.PodPhase{v1.PodPending}, int(aw.Spec.SchedSpec.MinAvailable), false)) } -func waitAWPodsReadyEx(ctx *context, aw *arbv1.AppWrapper, taskNum int, quite bool) error { - return wait.Poll(100*time.Millisecond, ninetySeconds, awPodPhase(ctx, aw, +func waitAWPodsReadyEx(ctx *context, aw *arbv1.AppWrapper, waitDuration time.Duration, taskNum int, quite bool) error { + return wait.Poll(100*time.Millisecond, waitDuration, awPodPhase(ctx, aw, []v1.PodPhase{v1.PodRunning, v1.PodSucceeded}, taskNum, quite)) } @@ -1085,32 +1085,32 @@ func createDeploymentAWwith550CPU(context *context, name string) *arbv1.AppWrapp rb := []byte(`{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": { - "name": "`+name+`", + "name": "` + name + `", "namespace": "test", "labels": { - "app": "`+name+`" + "app": "` + name + `" } }, "spec": { "replicas": 2, "selector": { "matchLabels": { - "app": "`+name+`" + "app": "` + name + `" } }, "template": { "metadata": { "labels": { - "app": "`+name+`" + "app": "` + name + `" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "`+name+`" + "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" } }, "spec": { "containers": [ { - "name": "`+name+`", + "name": "` + name + `", "image": "kicbase/echo-server:1.0", "resources": { "requests": { From 7b47bef070c5a8f53ecedab3892dab2f149abfa8 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Fri, 30 Jun 2023 15:58:25 +0300 Subject: [PATCH 12/23] Removed test focus --- test/e2e/queue.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 30a4d3839..ed8b021a5 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -725,7 +725,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) - FIt("Create AppWrapper - Generic 50 Deployment Only - 2 pods each", func() { + It("Create AppWrapper - Generic 50 Deployment Only - 2 pods each", func() { fmt.Fprintf(os.Stdout, "[e2e] Generic 50 Deployment Only - 2 pods each - Started.\n") context := initTestContext() From 58a4f94342df13e1c427a61046267649ece1fd06 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Mon, 3 Jul 2023 14:42:22 +0300 Subject: [PATCH 13/23] Fixed a few bugs with preemption. Updated test messages. Minor cleanups in the e2e tests. --- .../queuejob/queuejob_controller_ex.go | 29 ++-- test/e2e/queue.go | 14 +- test/yaml/0007-fast-preemption-test.yaml | 146 ++++++++++++++++++ 3 files changed, 173 insertions(+), 16 deletions(-) create mode 100644 test/yaml/0007-fast-preemption-test.yaml diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 834dd3db1..37c7c883f 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -500,7 +500,11 @@ func (qjm *XController) PreemptQueueJobs() { aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = aw.Spec.SchedSpec.Requeuing.TimeInSeconds } if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { - newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds + if newjob.Status.RequeueingTimeInSeconds == 0 { + newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds + } else { + newjob.Status.RequeueingTimeInSeconds += newjob.Status.RequeueingTimeInSeconds + } } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds } @@ -658,8 +662,15 @@ func (qjm *XController) GetQueueJobsEligibleForPreemption() []*arbv1.AppWrapper } else { condition = lastCondition } + var requeuingTimeInSeconds int + if value.Status.RequeueingTimeInSeconds > 0 { + requeuingTimeInSeconds = value.Status.RequeueingTimeInSeconds + } else if value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { + requeuingTimeInSeconds = value.Spec.SchedSpec.Requeuing.TimeInSeconds + } else { + requeuingTimeInSeconds = value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds + } - requeuingTimeInSeconds := value.Status.RequeueingTimeInSeconds minAge := condition.LastTransitionMicroTime.Add(time.Duration(requeuingTimeInSeconds) * time.Second) currentTime := time.Now() @@ -2344,7 +2355,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool // Cleanup function func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { - klog.V(3).Infof("[Cleanup] begin AppWrapper %s Version=%s Status=%+v\n", appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) + klog.V(3).Infof("[Cleanup] begin AppWrapper '%s/%s' Version=%s Status=%v", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) if !cc.isDispatcher { if appwrapper.Spec.AggrResources.Items != nil { @@ -2352,8 +2363,8 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { for _, ar := range appwrapper.Spec.AggrResources.Items { err00 := cc.qjobResControls[ar.Type].Cleanup(appwrapper, &ar) if err00 != nil { - klog.Errorf("[Cleanup] Error deleting item %s from job=%s Status=%+v err=%+v.", - ar.Type, appwrapper.Name, appwrapper.Status, err00) + klog.Errorf("[Cleanup] Error deleting item %s from app wrapper='%s/%s' Status=%+v err=%+v.", + ar.Type, appwrapper.Namespace, appwrapper.Name, appwrapper.Status, err00) } } } @@ -2361,10 +2372,10 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { for _, ar := range appwrapper.Spec.AggrResources.GenericItems { genericResourceName, gvk, err00 := cc.genericresources.Cleanup(appwrapper, &ar) if err00 != nil { - klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper=%s Status=%+v err=%+v.", - genericResourceName, appwrapper.Name, appwrapper.Status, err00) + klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper='%s/%s' Status=%+v err=%+v.", + genericResourceName, appwrapper.Namespace, appwrapper.Name, appwrapper.Status, err00) } - klog.Info("[Cleanup] Delete generic item %s, GVK=%s.%s.%s from app wrapper=%s Status=%+v", + klog.Infof("[Cleanup] Delete generic item %s, GVK=%s.%s.%s from app wrapper=%s Status=%+v", genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Name, appwrapper.Status) } } @@ -2389,7 +2400,7 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { appwrapper.Status.Running = 0 appwrapper.Status.Succeeded = 0 appwrapper.Status.Failed = 0 - klog.V(10).Infof("[Cleanup] end AppWrapper %s Version=%s Status=%+v\n", appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) + klog.V(10).Infof("[Cleanup] end AppWrapper '%s/%s' Version=%s Status=%+v", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) return nil } diff --git a/test/e2e/queue.go b/test/e2e/queue.go index ed8b021a5..2b5b38376 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -156,8 +156,8 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createJobAWWithInitContainer(context, "aw-job-3-init-container", 60, "exponential", 0) appwrappers = append(appwrappers, aw) - err := waitAWPodsCompleted(context, aw, 720*time.Second) // This test waits for 10 minutes to make sure all PODs complete - Expect(err).NotTo(HaveOccurred()) + err := waitAWPodsCompleted(context, aw, 12*time.Minute) // This test waits for 12 minutes to make sure all PODs complete + Expect(err).NotTo(HaveOccurred(), "Waiting for the pods to be completed") }) It("MCAD CPU Requeuing - Deletion After Maximum Requeuing Times Test", func() { @@ -611,14 +611,14 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericJobAWWithScheduleSpec(context, "aw-test-job-with-scheduling-spec") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred()) + Expect(err1).NotTo(HaveOccurred(), "Waiting for pods to be ready") err2 := waitAWPodsCompleted(context, aw, 90*time.Second) - Expect(err2).NotTo(HaveOccurred()) + Expect(err2).NotTo(HaveOccurred(), "Waiting for pods to be completed") // Once pods are completed, we wait for them to see if they change their status to anything BUT "Completed" // which SHOULD NOT happen because the job is done err3 := waitAWPodsNotCompleted(context, aw) - Expect(err3).To(HaveOccurred()) + Expect(err3).To(HaveOccurred(), "Waiting for pods not to be completed") appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion No-requeue Test - Completed.\n") @@ -751,7 +751,7 @@ var _ = Describe("AppWrapper E2E Test", func() { aws = append(aws, aw) } // Give the deployments time to create pods - time.Sleep(90 * time.Second) + time.Sleep(70 * time.Second) uncompletedAWS := aws // wait for pods to become ready, don't assume that they are ready in the order of submission. err := wait.Poll(500*time.Millisecond, 3*time.Minute, func() (done bool, err error) { @@ -773,7 +773,7 @@ var _ = Describe("AppWrapper E2E Test", func() { if err != nil { fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - There are %d app wrappers without ready pods, err = %v\n", len(uncompletedAWS), err) for _, uaw := range uncompletedAWS { - fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - Uncompleted AW '%s/%s' with status %v\n", uaw.Namespace, uaw.Name, uaw.Status) + fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - Uncompleted AW '%s/%s'\n", uaw.Namespace, uaw.Name) } } Expect(err).Should(Succeed(), "All app wrappers should have completed") diff --git a/test/yaml/0007-fast-preemption-test.yaml b/test/yaml/0007-fast-preemption-test.yaml new file mode 100644 index 000000000..67b106bd6 --- /dev/null +++ b/test/yaml/0007-fast-preemption-test.yaml @@ -0,0 +1,146 @@ +#apiVersion: mcad.ibm.com/v1beta1 +#kind: AppWrapper +#metadata: +# name: aw-deployment-2-550cpu-bbskat +# namespace: test +#spec: +# schedulingSpec: +# minAvailable: 2 +# requeuing: +# growthType: exponential +# maxNumRequeuings: 0 +# maxTimeInSeconds: 0 +# timeInSeconds: 60 +# resources: +# Items: +# - allocatedreplicas: 0 +# priorityslope: 0 +# replicas: 1 +# template: +# apiVersion: apps/v1 +# kind: Deployment +# metadata: +# labels: +# app: aw-deployment-2-550cpu-bbskat +# name: aw-deployment-2-550cpu-bbskat +# namespace: test +# spec: +# replicas: 2 +# selector: +# matchLabels: +# app: aw-deployment-2-550cpu-bbskat +# template: +# metadata: +# annotations: +# appwrapper.mcad.ibm.com/appwrapper-name: aw-deployment-2-550cpu-bbskat +# labels: +# app: aw-deployment-2-550cpu-bbskat +# spec: +# containers: +# - image: kicbase/echo-server:1.0 +# name: aw-deployment-2-550cpu-bbskat +# ports: +# - containerPort: 80 +# resources: +# requests: +# cpu: 550m +--- +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: aw-ff-deployment-1-850-cpu-ccdc29 + namespace: test +spec: + schedulingSpec: + minAvailable: 1 + requeuing: + growthType: exponential + maxNumRequeuings: 0 + maxTimeInSeconds: 0 + numRequeuings: 0 + timeInSeconds: 60 + resources: + custompodresources: + - replicas: 1 + requests: + cpu: 850m + generictemplate: + apiVersion: apps/v1 + kind: Deployment + metadata: + labels: + app: aw-ff-deployment-1-850-cpu-wkh8pa + name: aw-ff-deployment-1-850-cpu-wkh8pa + namespace: test + spec: + replicas: 1 + selector: + matchLabels: + app: aw-ff-deployment-1-850-cpu-wkh8pa + template: + metadata: + annotations: + appwrapper.mcad.ibm.com/appwrapper-name: aw-ff-deployment-1-850-cpu-wkh8pa + labels: + app: aw-ff-deployment-1-850-cpu-wkh8pa + spec: + containers: + - image: kicbase/echo-server:1.0 + name: aw-ff-deployment-1-850-cpu-wkh8pa + ports: + - containerPort: 80 + resources: + requests: + cpu: 850m + priority: 0 + priorityslope: 0 +--- +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: aw-ff-deployment-2-340-cpu-w6xkoe + namespace: test +spec: + schedulingSpec: + minAvailable: 2 + requeuing: + growthType: exponential + maxNumRequeuings: 0 + timeInSeconds: 60 + resources: + GenericItems: + - allocated: 0 + custompodresources: + - replicas: 2 + requests: + cpu: 340m + generictemplate: + apiVersion: apps/v1 + kind: Deployment + metadata: + labels: + app: aw-ff-deployment-2-340-cpu-w6xkoe + name: aw-ff-deployment-2-340-cpu-w6xkoe + namespace: test + spec: + replicas: 2 + selector: + matchLabels: + app: aw-ff-deployment-2-340-cpu-w6xkoe + template: + metadata: + annotations: + appwrapper.mcad.ibm.com/appwrapper-name: aw-ff-deployment-2-340-cpu-w6xkoe + labels: + app: aw-ff-deployment-2-340-cpu-w6xkoe + spec: + containers: + - image: kicbase/echo-server:1.0 + name: aw-ff-deployment-2-340-cpu-w6xkoe + ports: + - containerPort: 80 + resources: + requests: + cpu: 340m + priority: 0 + priorityslope: 0 From 641a343def8dab5f981c4ebd1bdd4e6f251a7d82 Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Mon, 3 Jul 2023 17:33:21 +0300 Subject: [PATCH 14/23] Testcase updates --- test/e2e/queue.go | 46 ++++++++++++++++++++-------------------------- test/e2e/util.go | 14 +++++++------- 2 files changed, 27 insertions(+), 33 deletions(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 2b5b38376..e763630ba 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -428,19 +428,19 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-deployment-2-550cpu") // This should not fit on any node but should dispatch because there is enough aggregated resources. aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( - context, appendRandomString("aw-ff-deployment-1-850-cpu"), "850m", "850m", 1, 60) + context, appendRandomString("aw-ff-deployment-1-800-cpu"), "800m", "800m", 1, 60) appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting pending pods for app wrapper: aw-ff-deployment-1-800-cpu") err = waitAWPodsPending(context, aw2) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting pending pods (try 2) for app wrapper: aw-ff-deployment-1-800-cpu") // This should fit on cluster after AW aw-deployment-1-850-cpu above is automatically preempted on // scheduling failure @@ -449,17 +449,17 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw3) - // Wait for pods to get created, assumes preemption around 10 minutes - err = waitAWPodsExists(context, aw3, 720000*time.Millisecond) - Expect(err).NotTo(HaveOccurred()) + // Wait for pods to get created, assumes preemption around 12 minutes + err = waitAWPodsExists(context, aw3, 12*time.Minute) + Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-ff-deployment-2-340-cpu") // Make sure they are running err = waitAWPodsReady(context, aw3) Expect(err).NotTo(HaveOccurred()) - // Make sure pods from AW aw-deployment-1-700-cpu above do not exist proving preemption + // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred()) + Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-deployment-1-800-cpu") }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Not Queuing Test", func() { @@ -501,21 +501,21 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Waiting for pods to be ready for app wrapper: aw-deployment-2-550cpu") // This should fit on cluster but customPodResources is incorrect so AW pods are not created aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( - context, appendRandomString("aw-deployment-2-426-vs-425-cpu"), "426m", "425m", 2, 60) + context, appendRandomString("aw-deployment-2-427-vs-425-cpu"), "427m", "425m", 2, 60) appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred()) + Expect(err).To(HaveOccurred(), "Waiting for no pods to exist for app wrapper: aw-deployment-2-427-vs-425-cpu") }) - It("MCAD appwrapper timeout Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD appwrapper timeout Test - Started.\n") + It("MCAD app wrapper timeout Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD app wrapper timeout Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers @@ -526,17 +526,11 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err1).NotTo(HaveOccurred()) time.Sleep(60 * time.Second) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - if err != nil { - fmt.Fprintf(GinkgoWriter, "Error getting status") - } - pass := false - fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) - if aw1.Status.State == arbv1.AppWrapperStateFailed { - pass = true - } - Expect(pass).To(BeTrue()) + Expect(err).NotTo(HaveOccurred()) + fmt.Fprintf(GinkgoWriter, "[e2e] status of appwrapper: %v.\n", aw1.Status) + Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateFailed), "Expecting a failed state") appwrappers = append(appwrappers, aw) - fmt.Fprintf(os.Stdout, "[e2e] MCAD appwrapper timeout Test - Completed.\n") + fmt.Fprintf(os.Stdout, "[e2e] MCAD app wrapper timeout Test - Completed.\n") }) It("MCAD Job Completion Test", func() { @@ -663,14 +657,14 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Waiting for pods to be ready for app wrapper: aw-deployment-2-550cpu") // This should not fit on cluster aw2 := createDeploymentAWwith426CPU(context, appendRandomString("aw-deployment-2-426cpu")) appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred()) + Expect(err).To(HaveOccurred(), "No pods for app wrapper `aw-deployment-2-426cpu` are expected.") }) It("MCAD Deployment RuningHoldCompletion Test", func() { diff --git a/test/e2e/util.go b/test/e2e/util.go index df0b37fb0..e26e940e0 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -1490,36 +1490,36 @@ func createDeploymentAWwith426CPU(context *context, name string) *arbv1.AppWrapp rb := []byte(`{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": { - "name": "aw-deployment-2-426cpu", + "name": "` + name + `", "namespace": "test", "labels": { - "app": "aw-deployment-2-426cpu" + "app": "` + name + `" } }, "spec": { "replicas": 2, "selector": { "matchLabels": { - "app": "aw-deployment-2-426cpu" + "app": "` + name + `" } }, "template": { "metadata": { "labels": { - "app": "aw-deployment-2-426cpu" + "app": "` + name + `" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-426cpu" + "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" } }, "spec": { "containers": [ { - "name": "aw-deployment-2-426cpu", + "name": "` + name + `", "image": "kicbase/echo-server:1.0", "resources": { "requests": { - "cpu": "426m" + "cpu": "427m" } }, "ports": [ From 76e829cf996758fd3498766f0bad2ef9862454ff Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Tue, 4 Jul 2023 14:53:23 +0300 Subject: [PATCH 15/23] Added more messages to the e2e tests. --- test/e2e/queue.go | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index e763630ba..ad0dac768 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -409,10 +409,10 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWAnyPodsExists(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting any pods for app wrapper: aw-deployment-2-550-vs-550-cpu") err = waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550-vs-550-cpu") }) It("MCAD Scheduling Fail Fast Preemption Test", func() { @@ -432,15 +432,15 @@ var _ = Describe("AppWrapper E2E Test", func() { // This should not fit on any node but should dispatch because there is enough aggregated resources. aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( - context, appendRandomString("aw-ff-deployment-1-800-cpu"), "800m", "800m", 1, 60) + context, appendRandomString("aw-ff-deployment-1-850-cpu"), "850m", "850m", 1, 60) appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Expecting pending pods for app wrapper: aw-ff-deployment-1-800-cpu") + Expect(err).NotTo(HaveOccurred(), "Expecting pending pods for app wrapper: aw-ff-deployment-1-850-cpu") err = waitAWPodsPending(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Expecting pending pods (try 2) for app wrapper: aw-ff-deployment-1-800-cpu") + Expect(err).NotTo(HaveOccurred(), "Expecting pending pods (try 2) for app wrapper: aw-ff-deployment-1-850-cpu") // This should fit on cluster after AW aw-deployment-1-850-cpu above is automatically preempted on // scheduling failure @@ -455,7 +455,7 @@ var _ = Describe("AppWrapper E2E Test", func() { // Make sure they are running err = waitAWPodsReady(context, aw3) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting ready pods for app wrapper: aw-ff-deployment-2-340-cpu") // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption err = waitAWAnyPodsExists(context, aw2) @@ -474,7 +474,7 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550cpu") // This should not fit on cluster but customPodResources is incorrect so AW pods are created aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( @@ -483,10 +483,10 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Not expecting pods to exist for app wrapper: aw-deployment-2-425-vs-426-cpu") err = waitAWPodsReady(context, aw2) - Expect(err).To(HaveOccurred()) + Expect(err).To(HaveOccurred(), "Not expecting pods to exist for app wrapper: aw-deployment-2-425-vs-426-cpu") }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Queuing Test 2", func() { @@ -523,11 +523,11 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericAWTimeoutWithStatus(context, "aw-test-jobtimeout-with-comp-1") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred()) + Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-test-jobtimeout-with-comp-1") time.Sleep(60 * time.Second) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred()) - fmt.Fprintf(GinkgoWriter, "[e2e] status of appwrapper: %v.\n", aw1.Status) + Expect(err).NotTo(HaveOccurred(), "Expecting no error when getting app wrapper status") + fmt.Fprintf(GinkgoWriter, "[e2e] status of app wrapper: %v.\n", aw1.Status) Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateFailed), "Expecting a failed state") appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD app wrapper timeout Test - Completed.\n") @@ -564,21 +564,14 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericJobAWWithMultipleStatus(context, "aw-test-job-with-comp-ms-21") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred()) + Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: 'aw-test-job-with-comp-ms-21'") time.Sleep(1 * time.Minute) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - if err != nil { - fmt.Fprint(GinkgoWriter, "Error getting status") - } - pass := false - fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) - if aw1.Status.State == arbv1.AppWrapperStateCompleted { - pass = true - } - Expect(pass).To(BeTrue()) + Expect(err).NotTo(HaveOccurred(), "No error is expected when getting status") + fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Multi-Item Job Completion Test status of AW %v.\n", aw1.Status) + Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateCompleted), "Expecting a completed app wrapper status") appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion Test - Completed.\n") - }) It("MCAD GenericItem Without Status Test", func() { @@ -590,8 +583,9 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAWGenericItemWithoutStatus(context, "aw-test-job-with-comp-44") err1 := waitAWPodsReady(context, aw) + appwrappers = append(appwrappers, aw) fmt.Fprintf(GinkgoWriter, "The error is: %v", err1) - Expect(err1).To(HaveOccurred()) + Expect(err1).To(HaveOccurred(), "Expecting for pods not to be ready for app wrapper: aw-test-job-with-comp-44") fmt.Fprintf(os.Stdout, "[e2e] MCAD GenericItem Without Status Test - Completed.\n") }) From f0e55b257ac1fad940e8069a248073840ff1a43d Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Wed, 5 Jul 2023 12:09:31 +0300 Subject: [PATCH 16/23] Error handling and messages updates. Fixed warning in go.mod file --- go.mod | 2 +- pkg/controller/metrics/adapter/adapter.go | 20 +- .../queuejob/queuejob_controller_ex.go | 913 +++++++++--------- .../genericresource/genericresource.go | 18 +- 4 files changed, 482 insertions(+), 471 deletions(-) diff --git a/go.mod b/go.mod index 6a19ad0b1..907cdc8b3 100644 --- a/go.mod +++ b/go.mod @@ -5,6 +5,7 @@ go 1.18 require ( github.com/eapache/go-resiliency v1.3.0 github.com/emicklei/go-restful v2.16.0+incompatible + github.com/gogo/protobuf v1.3.1 github.com/golang/protobuf v1.4.3 github.com/hashicorp/go-multierror v1.1.1 github.com/kubernetes-sigs/custom-metrics-apiserver v0.0.0-20210311094424-0ca2b1909cdc @@ -45,7 +46,6 @@ require ( github.com/go-openapi/jsonreference v0.19.5 // indirect github.com/go-openapi/spec v0.20.0 // indirect github.com/go-openapi/swag v0.19.12 // indirect - github.com/gogo/protobuf v1.3.1 // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/google/go-cmp v0.5.5 // indirect github.com/google/gofuzz v1.1.0 // indirect diff --git a/pkg/controller/metrics/adapter/adapter.go b/pkg/controller/metrics/adapter/adapter.go index ae3bede31..57b089726 100644 --- a/pkg/controller/metrics/adapter/adapter.go +++ b/pkg/controller/metrics/adapter/adapter.go @@ -32,39 +32,40 @@ package adapter import ( "flag" + "net/http" + "os" + "github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options" openapinamer "k8s.io/apiserver/pkg/endpoints/openapi" genericapiserver "k8s.io/apiserver/pkg/server" - "net/http" - "os" "github.com/emicklei/go-restful" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/rest" "k8s.io/klog/v2" - adapterprov "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter/provider" "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/apiserver" basecmd "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/cmd" "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/provider" generatedopenapi "github.com/kubernetes-sigs/custom-metrics-apiserver/test-adapter/generated/openapi" + adapterprov "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter/provider" clusterstatecache "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/cache" ) // New returns a Cache implementation. -func New(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdpater { +func New(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdapter { return newMetricsAdpater(serverOptions, config, clusterStateCache) } -type MetricsAdpater struct { +type MetricsAdapter struct { basecmd.AdapterBase // Message is printed on succesful startup Message string } -func (a *MetricsAdpater) makeProviderOrDie(clusterStateCache clusterstatecache.Cache) (provider.MetricsProvider, *restful.WebService) { +func (a *MetricsAdapter) makeProviderOrDie(clusterStateCache clusterstatecache.Cache) (provider.MetricsProvider, *restful.WebService) { klog.Infof("[makeProviderOrDie] Entered makeProviderOrDie()") client, err := a.DynamicClient() if err != nil { @@ -79,7 +80,7 @@ func (a *MetricsAdpater) makeProviderOrDie(clusterStateCache clusterstatecache.C return adapterprov.NewFakeProvider(client, mapper, clusterStateCache) } -func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOption) []string{ +func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOption) []string { var portedArgs = make([]string, 0) if serverOptions == nil { return portedArgs @@ -91,11 +92,10 @@ func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOpti } return portedArgs } -func newMetricsAdpater(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdpater { +func newMetricsAdpater(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdapter { klog.V(10).Infof("[newMetricsAdpater] Entered newMetricsAdpater()") - cmd := &MetricsAdpater{ - } + cmd := &MetricsAdapter{} cmd.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(generatedopenapi.GetOpenAPIDefinitions, openapinamer.NewDefinitionNamer(apiserver.Scheme)) cmd.OpenAPIConfig.Info.Title = "MetricsAdpater" diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 37c7c883f..3308e91cf 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -44,6 +44,7 @@ import ( "github.com/eapache/go-resiliency/retrier" "github.com/gogo/protobuf/proto" + "github.com/hashicorp/go-multierror" qmutils "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota/quotaforestmanager" @@ -53,8 +54,8 @@ import ( "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota" "k8s.io/apimachinery/pkg/api/equality" - apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -97,7 +98,6 @@ import ( clusterstateapi "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/api" clusterstatecache "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/cache" - apiErrors "k8s.io/apimachinery/pkg/api/errors" ) const ( @@ -108,9 +108,6 @@ const ( ControllerUIDLabel string = "controller-uid" ) -// controllerKind contains the schema.GroupVersionKind for this controller type. -var controllerKind = arbv1.SchemeGroupVersion.WithKind("AppWrapper") - // XController the AppWrapper Controller type type XController struct { config *rest.Config @@ -162,7 +159,7 @@ type XController struct { dispatchMap map[string]string // Metrics API Server - metricsAdapter *adapter.MetricsAdpater + metricsAdapter *adapter.MetricsAdapter // EventQueueforAgent agentEventQueue *cache.FIFO @@ -187,7 +184,7 @@ func NewJobAndClusterAgent(qjKey string, qaKey string) *JobAndClusterAgent { } } -// RegisterAllQueueJobResourceTypes - gegisters all resources +// RegisterAllQueueJobResourceTypes - registers all resources func RegisterAllQueueJobResourceTypes(regs *queuejobresources.RegisteredResources) { respod.Register(regs) resservice.Register(regs) @@ -442,12 +439,14 @@ func (qjm *XController) PreemptQueueJobs() { err := preemptRetrier.Run(func() error { var updateNewJob *arbv1.AppWrapper var message string - newjob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh appwrapper") + newjob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh app wrapper") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found. ", aw.Namespace, aw.Name) return nil + } else { + klog.Warningf("[PreemptQueueJobs] failed in retrieving a fresh copy of the app wrapper '%s/%s', err=%v. Will try to preempt on the next run.", aw.Namespace, aw.Name, retryErr) } - klog.Warningf("[PreemptQueueJobs] failed in retriving a fresh copy of the appwrapper '%s/%s', %v. Will try to preempt on the next run", aw.Namespace, aw.Name, retryErr) } newjob.Status.CanRun = false newjob.Status.FilterIgnore = true // update QueueJobState only @@ -471,10 +470,12 @@ func (qjm *XController) PreemptQueueJobs() { updateNewJob = newjob.DeepCopy() if retryErr := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded"); retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) return nil + } else if apierrors.IsConflict(retryErr) { + klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) } - klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' failed, err=%v. Retrying", aw.Namespace, aw.Name, retryErr) return retryErr } //cannot use cleanup AW, since it puts AW back in running state @@ -542,27 +543,29 @@ func (qjm *XController) PreemptQueueJobs() { retryErr = qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- MinPodsNotRunning") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) return nil + } else if apierrors.IsConflict(retryErr) { + klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) } - klog.Warningf("[PreemptQueueJobs] update of spec and status for '%s/%s' failed, err=%v. Will try to preempt on the next run", aw.Namespace, aw.Name, retryErr) return retryErr } if cleanAppWrapper { - klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of requeuings exceeded.", aw.Name, aw.Namespace) + klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of re-queueing(s) exceeded.", aw.Name, aw.Namespace) go qjm.Cleanup(updateNewJob) } else { //Only back-off AWs that are in state running and not in state Failed if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { - klog.V(4).Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to backoff queue.", aw.Name, aw.Namespace) + klog.Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to back off queue.", aw.Name, aw.Namespace) go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) } } return nil }) if err != nil { - klog.Infof("[PreemptQueueJobs] failed preemption for app wrapper %s/%s, err= %v. Will attempt on the next run", aw.Name, aw.Namespace, err) + klog.Errorf("[PreemptQueueJobs] failed preemption for app wrapper %s/%s, err= %v. Will attempt on the next run", aw.Name, aw.Namespace, err) } } } @@ -578,26 +581,28 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { err := preemptRetrier.Run(func() error { apiCacheAWJob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[preemptAWJobs] get fresh app wrapper") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when getting a fresh copy. ", aw.Namespace, aw.Name) return nil } - - klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %v/%v: %v", + klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %s/%s: err = %v", aw.Namespace, aw.Name, retryErr) return retryErr } apiCacheAWJob.Status.CanRun = false if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); retryErr != nil { - klog.Warningf("[preemptAWJobs] Failed to update status of AppWrapper %v/%v: %v. Retrying", - apiCacheAWJob.Namespace, apiCacheAWJob.Name, retryErr) + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) + return nil + } else if apierrors.IsConflict(retryErr) { + klog.Warningf("[preemptAWJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) + } return retryErr } return nil }) if err != nil { - if !apiErrors.IsNotFound(err) { - klog.Warningf("[preemptAWJobs] Failed to preempt app Wrapper '%s/%s'. App wrapper is not found", aw.Namespace, aw.Name) - } + klog.Errorf("[preemptAWJobs] Failed to preempt app Wrapper '%s/%s'. App wrapper is not found", aw.Namespace, aw.Name) } } } @@ -761,6 +766,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb status := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) if !status { + klog.Infof("[getAppWrapperCompletionStatus] Items not completed for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) //early termination because a required item is not completed return caw.Status.State } @@ -830,7 +836,7 @@ func (qjm *XController) getProposedPreemptions(requestingJob *arbv1.AppWrapper, //Sort keys of map priorityKeyValues := make([]float64, len(preemptableAWs)) i := 0 - for key, _ := range preemptableAWs { + for key := range preemptableAWs { priorityKeyValues[i] = key i++ } @@ -1128,7 +1134,7 @@ func (qjm *XController) ScheduleNext() { qjm.schedulingAW = qj qjm.schedulingMutex.Unlock() // ensure that current active appwrapper is reset at the end of this function, to prevent - // the appwrapper from being added in synch job + // the appwrapper from being added in syncjob defer qjm.schedulingAWAtomicSet(nil) scheduleNextRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) @@ -1140,7 +1146,8 @@ func (qjm *XController) ScheduleNext() { apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- get fresh copy after queue pop") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) return nil } klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, retryErr) @@ -1153,7 +1160,7 @@ func (qjm *XController) ScheduleNext() { apiCacheAWJob.DeepCopyInto(qj) } if qj.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper '%s/%s' from prioirty queue is already scheduled. Ignoring request: Status=%v", qj.Namespace, qj.Name, qj.Status) + klog.V(4).Infof("[ScheduleNext] AppWrapper '%s/%s' from priority queue is already scheduled. Ignoring request: Status=%+v", qj.Namespace, qj.Name, qj.Status) return nil } @@ -1195,14 +1202,14 @@ func (qjm *XController) ScheduleNext() { klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- after dynamic priority pop") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { return nil } klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, retryErr) return err } if apiCacheAWJob.Status.CanRun { - klog.Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", qj.Name, qj.Status) + klog.Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v", qj.Name, qj.Status) return nil } apiCacheAWJob.DeepCopyInto(qj) @@ -1213,17 +1220,22 @@ func (qjm *XController) ScheduleNext() { qjm.addOrUpdateCondition(qj, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") qj.Status.FilterIgnore = true // update QueueJobState only - err = qjm.updateStatusInEtcd(qj, "ScheduleNext - setHOL") - if err != nil { - return err + retryErr = qjm.updateStatusInEtcd(qj, "ScheduleNext - setHOL") + if retryErr != nil { + if apierrors.IsConflict(err) { + klog.Warningf("[ScheduleNext] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) + } else { + klog.Errorf("[ScheduleNext] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) + } + return retryErr } qjm.qjqueue.AddUnschedulableIfNotPresent(qj) // working on qj, avoid other threads putting it back to activeQ - klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) + klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) if qjm.isDispatcher { - klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch next appwrapper: '%s/%s Status=%v", qj.Namespace, qj.Name, qj.Status) + klog.Infof("[ScheduleNext] [Dispatcher Mode] Attempting to dispatch next appwrapper: '%s/%s Status=%v", qj.Namespace, qj.Name, qj.Status) } else { - klog.Infof("[ScheduleNext] [Agent Mode] Dispatch next appwrapper: '%s/%s' Status=%v", qj.Namespace, qj.Name, qj.Status) + klog.Infof("[ScheduleNext] [Agent Mode] Attempting to dispatch next appwrapper: '%s/%s' Status=%v", qj.Namespace, qj.Name, qj.Status) } dispatchFailedReason := "AppWrapperNotRunnable." @@ -1235,6 +1247,10 @@ func (qjm *XController) ScheduleNext() { // Add XQJ -> Agent Map apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Dispatcher Mode] get appwrapper") if retryErr != nil { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) + return nil + } klog.Errorf("[ScheduleNext] [Dispatcher Mode] failed to retrieve the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, err) return err } @@ -1250,10 +1266,14 @@ func (qjm *XController) ScheduleNext() { klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] %s, %s: ScheduleNextBeforeEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) retryErr = qjm.updateStatusInEtcd(qj, "[ScheduleNext] [Dispatcher Mode] - setCanRun") if retryErr != nil { - klog.Errorf("[ScheduleNext] [Dispatcher Mode] failed to update status in etcd the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, err) + if apierrors.IsConflict(err) { + klog.Warningf("[ScheduleNext] [Dispatcher Mode] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) + } else { + klog.Errorf("[ScheduleNext] [Dispatcher Mode] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) + } return retryErr } - if err := qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ + if err00 := qjm.eventQueue.Add(qj); err00 != nil { // unsuccessful add to eventQueue, add back to activeQ klog.Errorf("[ScheduleNext] [Dispatcher Mode] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) qjm.qjqueue.MoveToActiveQueueIfExists(qj) } else { // successful add to eventQueue, remove from qjqueue @@ -1279,7 +1299,7 @@ func (qjm *XController) ScheduleNext() { quotaFits := false // Try to forward to eventQueue for at most HeadOfLineHoldingTime for !forwarded { - klog.Infof("[ScheduleNext] [Agent Mode] Forwarding loop iteration: %d", fowardingLoopCount) + klog.V(4).Infof("[ScheduleNext] [Agent Mode] Forwarding loop iteration: %d", fowardingLoopCount) priorityindex := qj.Status.SystemPriority // Support for Non-Preemption if !qjm.serverOption.Preemption { @@ -1298,12 +1318,12 @@ func (qjm *XController) ScheduleNext() { if aggqj.LessEqual(resources) { unallocatedHistogramMap := qjm.cache.GetUnallocatedHistograms() if !qjm.nodeChecks(unallocatedHistogramMap, qj) { - klog.V(4).Infof("[ScheduleNext] [Agent Mode] Optimistic dispatch for AW '%s/%s' requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", + klog.Infof("[ScheduleNext] [Agent Mode] Optimistic dispatch for AW '%s/%s' requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", qj.Name, qj.Namespace, qjm.GetAggregatedResources(qj), proto.MarshalTextString(unallocatedHistogramMap["gpu"])) } // Now evaluate quota fits := true - klog.V(4).Infof("[ScheduleNext] [Agent Mode] available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", + klog.Infof("[ScheduleNext] [Agent Mode] available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v.", qj.Name, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) if qjm.serverOption.QuotaEnabled { if qjm.quotaManager != nil { @@ -1330,15 +1350,20 @@ func (qjm *XController) ScheduleNext() { if updateLabels { tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] update labels") if retryErr != nil { - if apiErrors.IsNotFound(retryErr) { + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[ScheduleNext] [Agent Mode] app wrapper '%s/%s' not found while trying to update labels, skiping dispatch.", qj.Namespace, qj.Name) return nil } - klog.Warningf("[ScheduleNext] [Agent Mode] Failed to added get fresh copy of the app wrapper '%s/%s' to update quota lables, err = %v", qj.Namespace, qj.Name, retryErr) return retryErr } tempAW.SetLabels(newLabels) updatedAW, retryErr := qjm.updateEtcd(tempAW, "ScheduleNext [Agent Mode] - setDefaultQuota") if retryErr != nil { + if apierrors.IsConflict(err) { + klog.Warningf("[ScheduleNext] [Agent mode] Conflict error detected when updating labels in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) + } else { + klog.Errorf("[ScheduleNext] [Agent mode] Failed to update labels in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) + } return retryErr } klog.Infof("[ScheduleNext] [Agent Mode] Default quota added to AW '%s/%s'", qj.Namespace, qj.Name) @@ -1348,7 +1373,8 @@ func (qjm *XController) ScheduleNext() { var preemptAWs []*arbv1.AppWrapper quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) if quotaFits { - klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.Infof("[ScheduleNext] [Agent mode] quota evaluation successful for app wrapper '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) // Set any jobs that are marked for preemption qjm.preemptAWJobs(preemptAWs) } else { // Not enough free quota to dispatch appwrapper @@ -1357,8 +1383,8 @@ func (qjm *XController) ScheduleNext() { dispatchFailedReason += " " dispatchFailedReason += msg } - klog.V(3).Infof("[ScheduleNext] [Agent Mode] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", - qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) + klog.Infof("[ScheduleNext] [Agent Mode] Blocking dispatch for app wrapper '%s/%s' due to quota limits, activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s", + qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) } fits = quotaFits } else { @@ -1368,7 +1394,7 @@ func (qjm *XController) ScheduleNext() { klog.Errorf("[ScheduleNext] [Agent Mode] Quota evaluation is enabled but not initialized. AppWrapper '%s/%s' does not have enough quota", qj.Namespace, qj.Name) } } else { - klog.V(4).Infof("[ScheduleNext] [Agent Mode] HOL quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, + klog.V(4).Infof("[ScheduleNext] [Agent Mode] quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched @@ -1376,6 +1402,9 @@ func (qjm *XController) ScheduleNext() { // aw is ready to go! tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] -- ready to dispatch") if retryErr != nil { + if apierrors.IsNotFound(retryErr) { + return nil + } klog.Errorf("[ScheduleNext] [Agent Mode] Failed to get fresh copy of the app wrapper '%s/%s' to update status, err = %v", qj.Namespace, qj.Name, err) return retryErr } @@ -1393,30 +1422,33 @@ func (qjm *XController) ScheduleNext() { //Quota was allocated for this appwrapper, release it. qjm.quotaManager.Release(qj) } - if apiErrors.IsNotFound(retryErr) { - klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[ScheduleNext] [Agent Mode] app wrapper '%s/%s' not found after status update, skiping dispatch.", qj.Namespace, qj.Name) return nil + } else if apierrors.IsConflict(err) { + klog.Warningf("[ScheduleNext] [Agent mode] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) + } else { + klog.Errorf("[ScheduleNext] [Agent mode] Failed to update status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) } return retryErr } tempAW.DeepCopyInto(qj) // add to eventQueue for dispatching to Etcd - if err = qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] [Agent Mode] Fail to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Namespace, + if err00 := qjm.eventQueue.Add(qj); err00 != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] [Agent Mode] Failed to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status, err) qjm.qjqueue.MoveToActiveQueueIfExists(qj) } else { // successful add to eventQueue, remove from qjqueue qjm.qjqueue.Delete(qj) forwarded = true - klog.V(4).Infof("[ScheduleNext] [Agent Mode]'%s/%s' Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(qj), - qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.Infof("[ScheduleNext] [Agent Mode] Successfully dispatched app wrapper '%s/%s' activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } } //fits } else { // Not enough free resources to dispatch HOL dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." - klog.V(4).Infof("[ScheduleNext] [Agent Mode] HOL Blocking by '%s/%s' for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), + klog.Infof("[ScheduleNext] [Agent Mode] Failed to dispatch app wrapper '%s/%s' due to insuficient resources, activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } // if the HeadOfLineHoldingTime option is not set it will break the loop @@ -1441,7 +1473,8 @@ func (qjm *XController) ScheduleNext() { fowardingLoopCount += 1 } if !forwarded { // start thread to backoff - klog.V(3).Infof("[ScheduleNext][Agent Mode] HOL backoff %s after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.Infof("[ScheduleNext] [Agent Mode] backing off app wrapper '%s/%s' after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) if qjm.quotaManager != nil && quotaFits { qjm.quotaManager.Release(qj) } @@ -1450,7 +1483,7 @@ func (qjm *XController) ScheduleNext() { } return nil }) - if apiErrors.IsNotFound(err) { + if apierrors.IsNotFound(err) { klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) return } @@ -1495,82 +1528,6 @@ func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, c return nil } -func (qjm *XController) waitForPodCountUpdates(searchCond *arbv1.AppWrapperCondition) bool { - - // Continue reserviing resourses if dispatched condition not found - if searchCond == nil { - klog.V(10).Infof("[waitForPodCountUpdates] No condition not found.") - return true - } - - // Current time - now := metav1.NowMicro() - nowPtr := &now - - // Last time AW was dispatched - dispactedTS := searchCond.LastUpdateMicroTime - dispactedTSPtr := &dispactedTS - - // Error checking - if nowPtr.Before(dispactedTSPtr) { - klog.Errorf("[waitForPodCountUpdates] Current timestamp: %s is before condition latest update timestamp: %s", - now.String(), dispactedTS.String()) - return true - } - - // Duration since last time AW was dispatched - timeSinceDispatched := now.Sub(dispactedTS.Time) - - // Convert timeout default from milli-seconds to microseconds - timeoutMicroSeconds := qjm.serverOption.DispatchResourceReservationTimeout * 1000 - - // Don't reserve resources if timeout is hit - if timeSinceDispatched.Microseconds() > timeoutMicroSeconds { - klog.V(10).Infof("[waitForPodCountUpdates] Dispatch duration time %d microseconds has reached timeout value of %d microseconds", - timeSinceDispatched.Microseconds(), timeoutMicroSeconds) - - return false - } - - klog.V(10).Infof("[waitForPodCountUpdates] Dispatch duration time %d microseconds has not reached timeout value of %d microseconds", - timeSinceDispatched.Microseconds(), timeoutMicroSeconds) - return true -} - -func (qjm *XController) getLatestStatusConditionType(aw *arbv1.AppWrapper, condType arbv1.AppWrapperConditionType) *arbv1.AppWrapperCondition { - var latestConditionBasedOnType arbv1.AppWrapperCondition - if aw.Status.Conditions != nil && len(aw.Status.Conditions) > 0 { - // Find the latest matching condition based on type not related other condition fields - for _, condition := range aw.Status.Conditions { - // Matching condition? - if condition.Type == condType { - //First time match? - if (arbv1.AppWrapperCondition{} == latestConditionBasedOnType) { - latestConditionBasedOnType = condition - } else { - // Compare current condition to last match and get keep the later condition - currentCondLastUpdate := condition.LastUpdateMicroTime - currentCondLastUpdatePtr := ¤tCondLastUpdate - lastCondLastUpdate := latestConditionBasedOnType.LastUpdateMicroTime - lastCondLastUpdatePtr := &lastCondLastUpdate - if lastCondLastUpdatePtr.Before(currentCondLastUpdatePtr) { - latestConditionBasedOnType = condition - } - } - } // Condition type match check - } // Loop through conditions of AW - } // AW has conditions? - - // If no matching condition found return nil otherwise return matching latest condition - if (arbv1.AppWrapperCondition{} == latestConditionBasedOnType) { - klog.V(10).Infof("[getLatestStatusConditionType] No disptach condition found for AppWrapper=%s/%s.", - aw.Name, aw.Namespace) - return nil - } else { - return &latestConditionBasedOnType - } -} - func (qjm *XController) addOrUpdateCondition(aw *arbv1.AppWrapper, condType arbv1.AppWrapperConditionType, condStatus v1.ConditionStatus, condReason string, condMsg string) { var dupConditionExists bool = false @@ -1601,23 +1558,25 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri etcUpdateRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) err := etcUpdateRetrier.Run(func() error { - apiCacheAWJob, err := qjm.getAppWrapper(q.Namespace, q.Name, "[backoff] - Rejoining") - if err != nil { - return err + apiCacheAWJob, retryErr := qjm.getAppWrapper(q.Namespace, q.Name, "[backoff] - Rejoining") + if retryErr != nil { + return retryErr } q.Status.DeepCopyInto(&apiCacheAWJob.Status) apiCacheAWJob.Status.QueueJobState = arbv1.AppWrapperCondBackoff apiCacheAWJob.Status.FilterIgnore = true // update QueueJobState only, no work needed // Update condition qjm.addOrUpdateCondition(apiCacheAWJob, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) - if err := qjm.updateStatusInEtcd(apiCacheAWJob, "[backoff] - Rejoining"); err != nil { - klog.Warningf("[backoff] Failed to updated AW status in etcd '%s/%s'. Continuing with possible stale object without updating conditions. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) - return err + if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "[backoff] - Rejoining"); retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[backoff] Conflict on when upating AW status in etcd '%s/%s'. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) + } + return retryErr } return nil }) if err != nil { - klog.Errorf("[backoff] Failed to retrieve cached object for %s/%s. Continuing with possible stale object without updating conditions.", q.Namespace, q.Name) + klog.Errorf("[backoff] Failed to update status for %s/%s. Continuing with possible stale object without updating conditions. err=%s", q.Namespace, q.Name, err) } qjm.qjqueue.AddUnschedulableIfNotPresent(q) klog.V(3).Infof("[backoff] %s move to unschedulableQ before sleep for %d seconds. activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", q.Name, @@ -1671,7 +1630,6 @@ func (cc *XController) Run(stopCh chan struct{}) { go wait.Until(cc.agentEventQueueWorker, time.Second, stopCh) // Update Agent Worker } - // go wait.Until(cc.worker, time.Second, stopCh) go wait.Until(cc.worker, 0, stopCh) } @@ -1939,27 +1897,47 @@ func (cc *XController) worker() { if err != nil { klog.Warningf("[worker] Fail to process item from eventQueue, err %v. Attempting to re-enqueque...", err) if err00 := cc.enqueueIfNotPresent(item); err00 != nil { - klog.Errorf("[worker] Failed to re-enqueue item, err %v", err00) + klog.Errorf("[worker] Fatal error railed to re-enqueue item, err %v", err00) + } else { + klog.Warning("[worker] Item re-enqueued") } - klog.Warning("[worker] Item re-enqueued") return } } func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { + // validate that app wraper has not been marked for deletion by the infomer's delete handler + if qj.DeletionTimestamp != nil { + klog.Infof("[syncQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) + // cleanup resources for running job, ignoring errors + if err00 := cc.Cleanup(qj); err00 != nil { + klog.Warningf("Failed to cleanup resources for app wrapper '%s/%s', err = %v", qj.Namespace, qj.Name, err00) + } + //empty finalizers and delete the queuejob again + if accessor, err00 := meta.Accessor(qj); err00 == nil { + accessor.SetFinalizers(nil) + } + // we delete the job from the queue if it is there, ignoring errors + cc.qjqueue.Delete(qj) + klog.Infof("[syncQueueJob] AW job=%s/%s deleted.", qj.Name, qj.Namespace) + return nil + } cacheAWJob, err := cc.getAppWrapper(qj.Namespace, qj.Name, "[syncQueueJob] get fresh appwrapper ") if err != nil { - // Implicit detection of deletion if apierrors.IsNotFound(err) { - klog.Warningf("[syncQueueJob] AppWrapper '%s/%s' not found in cache.", qj.Namespace, qj.Name) - cc.Cleanup(qj) + klog.Warningf("[syncQueueJob] AppWrapper '%s/%s' not found in cache and will be deleted", qj.Namespace, qj.Name) + // clean up app wrapper resources including quota + if err := cc.Cleanup(qj); err != nil { + klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err) + // return error so operation can be retried + return err + } cc.qjqueue.Delete(qj) return nil } - klog.Errorf("[syncQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", qj.Namespace, qj.Name, err) return err } - klog.V(10).Infof("[syncQueueJob] Cache AW %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[syncQueueJob] Cache AW '%s/%s' &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status) // make sure qj has the latest information if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { @@ -1988,9 +1966,13 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") awNew.Status.Conditions = append(awNew.Status.Conditions, cond) awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - err := cc.updateStatusInEtcd(awNew, "[syncQueueJob] setRunning") + err := cc.updateStatusInEtcd(awNew, "[syncQueueJob] Update pod counts") if err != nil { - klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v", qj.Namespace, qj.Name, err) + if apierrors.IsConflict(err) { + klog.Warningf("[syncQueueJob] Conflict detected when updating pod status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Warningf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, err) + } return err } } @@ -2015,374 +1997,404 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // pods according to what is specified in the job.Spec. // Does NOT modify . func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool) error { - var err error startTime := time.Now() defer func() { klog.V(10).Infof("[manageQueueJob] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) }() - - if !cc.isDispatcher { // Agent Mode - - if qj.DeletionTimestamp != nil { - - klog.V(4).Infof("[manageQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) - - // cleanup resources for running job - err = cc.Cleanup(qj) - if err != nil { - return err - } - //empty finalizers and delete the queuejob again - accessor, err := meta.Accessor(qj) - if err != nil { - return err - } - accessor.SetFinalizers(nil) - - // we delete the job from the queue if it is there - cc.qjqueue.Delete(qj) - - return nil - } - //Job is Complete only update pods if needed. - if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { - if podPhaseChanges { - // Only update etcd if AW status has changed. This can happen for periodic - // updates of pod phase counts done in caller of this function. - if err := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + preemptRetrier.SetJitter(0.05) + err := preemptRetrier.Run(func() error { + cacheAWJob, retryErr := cc.getAppWrapper(qj.Namespace, qj.Name, "[manageQueueJob] get fresh appwrapper ") + if retryErr != nil { + // Implicit detection of deletion + if apierrors.IsNotFound(retryErr) { + klog.Warningf("[manageQueueJob] AppWrapper '%s/%s' not found in cache and will be deleted.", qj.Namespace, qj.Name) + // clean up app wrapper resources including quota + if err := cc.Cleanup(qj); err != nil { + klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err) + // return error so operation can be retried from synch queue job return err } + cc.qjqueue.Delete(qj) + return nil } - return nil + klog.Errorf("[manageQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", qj.Namespace, qj.Name, retryErr) + return retryErr } + klog.V(10).Infof("[manageQueueJob] Cache AW '%s/%s' &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status) - // First execution of qj to set Status.State = Enqueued - if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { - // if there are running resources for this job then delete them because the job was put in - // pending state... - - // If this the first time seeing this AW, no need to delete. - stateLen := len(qj.Status.State) - if stateLen > 0 { - klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status=%+v", qj.Namespace, qj.Name, qj.Status) - err = cc.Cleanup(qj) - klog.V(8).Infof("[manageQueueJob] Validation after deleting resources for AppWrapper Job '%s/%s' because it was be preempted, status=%+v", qj.Namespace, qj.Name, qj.Status) - if err != nil { - klog.Errorf("[manageQueueJob] Fail to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) - return err + // make sure qj has the latest information + if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { + klog.V(5).Infof("[manageQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) + klog.V(5).Infof("[manageQueueJob] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) + cacheAWJob.DeepCopyInto(qj) + } + if !cc.isDispatcher { // Agent Mode + + //Job is Complete only update pods if needed. + if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { + if podPhaseChanges { + // Only update etcd if AW status has changed. This can happen for periodic + // updates of pod phase counts done in caller of this function. + if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } } - } - - qj.Status.State = arbv1.AppWrapperStateEnqueued - // add qj to qjqueue only when it is not in UnschedulableQ - if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) return nil } - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - } - - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - err := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing") - if err != nil { - klog.Errorf("[manageQueueJob] Failed to updated etcd for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err) - return err - } - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { - klog.Errorf("manageQueueJob] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) - cc.enqueue(qj) - } else { - klog.V(3).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - return nil - } // End of first execution of qj to add to qjqueue for ScheduleNext - - //Handle recovery condition - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { - // One more check to ensure AW is not the current active schedule object - if cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { - cc.qjqueue.AddIfNotPresent(qj) - klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", - qj.Namespace, qj.Name, qj.Status) - return nil - } - } + // First execution of qj to set Status.State = Enqueued + if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { + // if there are running resources for this job then delete them because the job was put in + // pending state... - // add qj to Etcd for dispatch - if qj.Status.CanRun && qj.Status.State != arbv1.AppWrapperStateActive && - qj.Status.State != arbv1.AppWrapperStateCompleted && - qj.Status.State != arbv1.AppWrapperStateRunningHoldCompletion { - //keep conditions until the appwrapper is re-dispatched - qj.Status.PendingPodConditions = nil - - qj.Status.State = arbv1.AppWrapperStateActive - // Bugfix to eliminate performance problem of overloading the event queue.} - - if qj.Spec.AggrResources.Items != nil { - for i := range qj.Spec.AggrResources.Items { - err := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { - return strconv.Itoa(i) - }) - if err != nil { - return err + // If this the first time seeing this AW, no need to delete. + stateLen := len(qj.Status.State) + if stateLen > 0 { + klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status=%v", qj.Namespace, qj.Name, qj.Status) + err00 := cc.Cleanup(qj) + if err00 != nil { + klog.Errorf("[manageQueueJob] Failed to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err00) + return err00 } + klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was be preempted was sucessfull, status=%v", qj.Namespace, qj.Name, qj.Status) } - } - klog.V(3).Infof("[worker-manageQJ] %s 3Delay=%.6f seconds BeforeDispatchingToEtcd Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) - dispatched := true - dispatchFailureReason := "ItemCreationFailure." - dispatchFailureMessage := "" - for _, ar := range qj.Spec.AggrResources.Items { - klog.V(10).Infof("[worker-manageQJ] before dispatch [%v].SyncQueueJob %s &qj=%p Version=%s Status=%+v", ar.Type, qj.Name, qj, qj.ResourceVersion, qj.Status) - // Call Resource Controller of ar.Type to issue REST call to Etcd for resource creation - err00 := cc.qjobResControls[ar.Type].SyncQueueJob(qj, &ar) - if err00 != nil { - dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) - klog.V(3).Infof("[worker-manageQJ] Error dispatching job=%s type=%v Status=%+v err=%+v", qj.Name, ar.Type, qj.Status, err00) - dispatched = false - break - } - } - // Handle generic resources - for _, ar := range qj.Spec.AggrResources.GenericItems { - klog.V(10).Infof("[worker-manageQJ] before dispatch Generic.SyncQueueJob %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) - _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) - if err00 != nil { - dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) - klog.Errorf("[worker-manageQJ] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) - dispatched = false + + qj.Status.State = arbv1.AppWrapperStateEnqueued + // add qj to qjqueue only when it is not in UnschedulableQ + if cc.qjqueue.IfExistUnschedulableQ(qj) { + klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + return nil } - } - if dispatched { // set AppWrapperCondRunning if all resources are successfully dispatched - qj.Status.QueueJobState = arbv1.AppWrapperCondDispatched - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondDispatched, "AppWrapperRunnable") + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") if index < 0 { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") qj.Status.Conditions = append(qj.Status.Conditions, cond) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") qj.Status.Conditions[index] = *cond.DeepCopy() } - klog.V(3).Infof("[worker-manageQJ] %s 4Delay=%.6f seconds AllResourceDispatchedToEtcd Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) - - } else { - qj.Status.State = arbv1.AppWrapperStateFailed - qj.Status.QueueJobState = arbv1.AppWrapperCondFailed - if !isLastConditionDuplicate(qj, arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) - qj.Status.Conditions = append(qj.Status.Conditions, cond) + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing") + if retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr } - cc.Cleanup(qj) - } - - qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch - err := cc.updateStatusInEtcd(qj, "manageQueueJob - afterEtcdDispatching") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job='%s/%s' Status=%v err=%v", qj.Namespace, qj.Name, qj.Status, err) - return err - } - - } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { - //set appwrapper status to Complete or RunningHoldCompletion - derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) - - //Set Appwrapper state to complete if all items in Appwrapper - //are completed - if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { - qj.Status.State = derivedAwStatus - var updateQj *arbv1.AppWrapper - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondRunningHoldCompletion, "SomeItemsCompleted") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondRunningHoldCompletion - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - qj.Status.FilterIgnore = true // Update AppWrapperCondRunningHoldCompletion - updateQj = qj.DeepCopy() + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if err00 := cc.qjqueue.AddIfNotPresent(qj); err00 != nil { + klog.Errorf("manageQueueJob] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) + cc.enqueue(qj) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - updateQj = qj.DeepCopy() + klog.V(3).Infof("[manageQueueJob] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } - if err := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) - return err + return nil + } + // Handle recovery condition + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { + // One more check to ensure AW is not the current active scheduled object + if cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { + cc.qjqueue.AddIfNotPresent(qj) + klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", + qj.Namespace, qj.Name, qj.Status) + return nil } } - //Set appwrapper status to complete - if derivedAwStatus == arbv1.AppWrapperStateCompleted { - qj.Status.State = derivedAwStatus - qj.Status.CanRun = false - var updateQj *arbv1.AppWrapper - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted - updateQj = qj.DeepCopy() - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - updateQj = qj.DeepCopy() + + // add qj to Etcd for dispatch + if qj.Status.CanRun && qj.Status.State != arbv1.AppWrapperStateActive && + qj.Status.State != arbv1.AppWrapperStateCompleted && + qj.Status.State != arbv1.AppWrapperStateRunningHoldCompletion { + //keep conditions until the appwrapper is re-dispatched + qj.Status.PendingPodConditions = nil + + qj.Status.State = arbv1.AppWrapperStateActive + if qj.Spec.AggrResources.Items != nil { + for i := range qj.Spec.AggrResources.Items { + err00 := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { + return strconv.Itoa(i) + }) + if err00 != nil { + klog.Warningf("Failed to add tag to aggregate resource item %s of app apprapper '%s/%s', err = %v", qj.Spec.AggrResources.Items[i].Name, qj.Namespace, qj.Name, err00) + } + } } - err := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setCompleted") - if err != nil { - if cc.quotaManager != nil { - cc.quotaManager.Release(updateQj) + klog.V(4).Infof("[manageQueueJob] %s 3Delay=%.6f seconds BeforeDispatchingToEtcd Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) + dispatched := true + dispatchFailureReason := "ItemCreationFailure." + dispatchFailureMessage := "" + for _, ar := range qj.Spec.AggrResources.Items { + klog.V(10).Infof("[manageQueueJob] before dispatch [%v].SyncQueueJob %s &qj=%p Version=%s Status=%+v", ar.Type, qj.Name, qj, qj.ResourceVersion, qj.Status) + // Call Resource Controller of ar.Type to issue REST call to Etcd for resource creation + err00 := cc.qjobResControls[ar.Type].SyncQueueJob(qj, &ar) + if err00 != nil { + if apierrors.IsInvalid(err00) { + klog.Warningf("[manageQueueJob] Invalid item sent for dispatching by app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) + } else { + klog.Errorf("[manageQueueJob] Error dispatching item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) + } + dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + dispatched = false + break } - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) - return err } - if cc.quotaManager != nil { - cc.quotaManager.Release(updateQj) + if dispatched { + // Handle generic resources + for _, ar := range qj.Spec.AggrResources.GenericItems { + klog.V(10).Infof("[manageQueueJob] before dispatch Generic.SyncQueueJob %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) + _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) + if err00 != nil { + if apierrors.IsInvalid(err00) { + klog.Warningf("[manageQueueJob] Invalid generic item sent for dispatching by app wrapper='%s/%s' err=%v", qj.Namespace, qj.Name, err00) + } else { + klog.Errorf("[manageQueueJob] Error dispatching generic item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, err00) + } + dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + klog.Errorf("[manageQueueJob] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) + dispatched = false + } + } } - } - // Bugfix to eliminate performance problem of overloading the event queue. - } else if podPhaseChanges { // Continued bug fix - // Only update etcd if AW status has changed. This can happen for periodic - // updates of pod phase counts done in caller of this function. - if err := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { - klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) - return err - } - } - // Finish adding qj to Etcd for dispatch + if dispatched { // set AppWrapperCondRunning if all resources are successfully dispatched + qj.Status.QueueJobState = arbv1.AppWrapperCondDispatched + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondDispatched, "AppWrapperRunnable") + if index < 0 { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + } - } else { // Dispatcher Mode + klog.V(3).Infof("[manageQueueJob] %s 4Delay=%.6f seconds AllResourceDispatchedToEtcd Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) - if qj.DeletionTimestamp != nil { - // cleanup resources for running job - err = cc.Cleanup(qj) - if err != nil { - return err - } - //empty finalizers and delete the queuejob again - accessor, err := meta.Accessor(qj) - if err != nil { - return err - } - accessor.SetFinalizers(nil) + } else { + qj.Status.State = arbv1.AppWrapperStateFailed + qj.Status.QueueJobState = arbv1.AppWrapperCondFailed + if !isLastConditionDuplicate(qj, arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) + qj.Status.Conditions = append(qj.Status.Conditions, cond) + } + // clean up app wrapper resources including quota + if err00 := cc.Cleanup(qj); err00 != nil { + klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err00) + // return error so operation can be retried + return err00 + } + cc.qjqueue.Delete(qj) + } - cc.qjqueue.Delete(qj) + qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch + retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - afterEtcdDispatching") + if retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } - return nil - } + } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { + //set appwrapper status to Complete or RunningHoldCompletion + derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) - if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { - // if there are running resources for this job then delete them because the job was put in - // pending state... - klog.V(3).Infof("[worker-manageQJ] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) - err = cc.Cleanup(qj) - if err != nil { - return err + //Set Appwrapper state to complete if all items in Appwrapper + //are completed + if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { + qj.Status.State = derivedAwStatus + var updateQj *arbv1.AppWrapper + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondRunningHoldCompletion, "SomeItemsCompleted") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondRunningHoldCompletion + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + qj.Status.FilterIgnore = true // Update AppWrapperCondRunningHoldCompletion + updateQj = qj.DeepCopy() + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + updateQj = qj.DeepCopy() + } + if retryErr := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } + } + //Set appwrapper status to complete + if derivedAwStatus == arbv1.AppWrapperStateCompleted { + qj.Status.State = derivedAwStatus + qj.Status.CanRun = false + var updateQj *arbv1.AppWrapper + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted + updateQj = qj.DeepCopy() + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + updateQj = qj.DeepCopy() + } + if retryErr := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setCompleted"); retryErr != nil { + if cc.quotaManager != nil { + cc.quotaManager.Release(updateQj) + } + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } + if cc.quotaManager != nil { + cc.quotaManager.Release(updateQj) + } + } + } else if podPhaseChanges { // Continued bug fix + // Only update etcd if AW status has changed. This can happen for periodic + // updates of pod phase counts done in caller of this function. + if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } } + return nil + } else { // Dispatcher Mode - qj.Status.State = arbv1.AppWrapperStateEnqueued - if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[worker-manageQJ] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } else { - klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - if err := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing"); err != nil { - return err + if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { + // if there are running resources for this job then delete them because the job was put in + // pending state... + klog.V(3).Infof("[manageQueueJob] [Dispatcher] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) + err00 := cc.Cleanup(qj) + if err00 != nil { + klog.Errorf("Failed to clean up resources for app wrapper '%s/%s', err =%v", qj.Namespace, qj.Name, err00) + return err00 } - if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { - klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) - cc.enqueue(qj) + + qj.Status.State = arbv1.AppWrapperStateEnqueued + if cc.qjqueue.IfExistUnschedulableQ(qj) { + klog.V(10).Infof("[manageQueueJob] [Dispatcher] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } else { - klog.V(4).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] before add to activeQ '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing"); retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + } + return retryErr + } + if err00 = cc.qjqueue.AddIfNotPresent(qj); err00 != nil { + klog.Errorf("[manageQueueJob] [Dispatcher] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) + cc.enqueue(qj) + } else { + klog.V(4).Infof("[manageQueueJob] [Dispatcher] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } } + return nil } - return nil - } - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { - cc.qjqueue.AddIfNotPresent(qj) - return nil - } - if qj.Status.CanRun && !qj.Status.IsDispatched { - if klog.V(10).Enabled() { - current_time := time.Now() - klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead Before Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[worker-manageQJ] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { + cc.qjqueue.AddIfNotPresent(qj) + return nil } + if qj.Status.CanRun && !qj.Status.IsDispatched { + if klog.V(10).Enabled() { + current_time := time.Now() + klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ '%s/%s' has Overhead Before Dispatching: %s", qj.Namespace, qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] '%s/%s', %s: WorkerBeforeDispatch", qj.Namespace, qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + } - queuejobKey, _ := GetQueueJobKey(qj) - if cc.isDispatcher { + queuejobKey, _ := GetQueueJobKey(qj) if agentId, ok := cc.dispatchMap[queuejobKey]; ok { - klog.V(10).Infof("[worker-manageQJ] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) cc.agentMap[agentId].CreateJob(qj) qj.Status.IsDispatched = true } else { - klog.Errorf("[worker-manageQJ] AppWrapper %s not found in dispatcher mapping.", qj.Name) + klog.Errorf("[manageQueueJob] [Dispatcher] AppWrapper %s not found in dispatcher mapping.", qj.Name) + } + if klog.V(10).Enabled() { + current_time := time.Now() + klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + } + if retryErr := cc.updateStatusInEtcd(qj, "[manageQueueJob] [Dispatcher] -- set dispatched true"); retryErr != nil { + klog.Errorf("Failed to update status of AppWrapper %s/%s: err=%v", + qj.Namespace, qj.Name, retryErr) + return retryErr } } - if klog.V(10).Enabled() { - current_time := time.Now() - klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[worker-manageQJ] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) - } - if err := cc.updateStatusInEtcd(qj, "[worker-manageQJ] -- set dispatched true"); err != nil { - klog.Errorf("Failed to update status of AppWrapper %s/%s: %s", - qj.Namespace, qj.Name, err) - return err - } + return nil } - - } + }) return err } // Cleanup function func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { - klog.V(3).Infof("[Cleanup] begin AppWrapper '%s/%s' Version=%s Status=%v", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) - + klog.V(3).Infof("[Cleanup] begin AppWrapper '%s/%s' Version=%s", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion) + var err *multierror.Error if !cc.isDispatcher { if appwrapper.Spec.AggrResources.Items != nil { // we call clean-up for each controller for _, ar := range appwrapper.Spec.AggrResources.Items { err00 := cc.qjobResControls[ar.Type].Cleanup(appwrapper, &ar) - if err00 != nil { - klog.Errorf("[Cleanup] Error deleting item %s from app wrapper='%s/%s' Status=%+v err=%+v.", - ar.Type, appwrapper.Namespace, appwrapper.Name, appwrapper.Status, err00) + if err00 != nil && !apierrors.IsNotFound(err00) { + klog.Errorf("[Cleanup] Error deleting item %s from app wrapper='%s/%s' err=%v.", + ar.Type, appwrapper.Namespace, appwrapper.Name, err00) + err = multierror.Append(err, err00) + continue } + klog.V(3).Infof("[Cleanup] Deleted item from app wrapper='%s/%s'", + appwrapper.Namespace, appwrapper.Name) } } if appwrapper.Spec.AggrResources.GenericItems != nil { for _, ar := range appwrapper.Spec.AggrResources.GenericItems { genericResourceName, gvk, err00 := cc.genericresources.Cleanup(appwrapper, &ar) - if err00 != nil { - klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper='%s/%s' Status=%+v err=%+v.", - genericResourceName, appwrapper.Namespace, appwrapper.Name, appwrapper.Status, err00) + if err00 != nil && !apierrors.IsNotFound(err00) { + klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper='%s/%s' err=%v.", + genericResourceName, appwrapper.Namespace, appwrapper.Name, err00) + err = multierror.Append(err, err00) + continue } - klog.Infof("[Cleanup] Delete generic item %s, GVK=%s.%s.%s from app wrapper=%s Status=%+v", - genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Name, appwrapper.Status) + klog.V(3).Infof("[Cleanup] Deleted generic item %s, GVK=%s.%s.%s from app wrapper='%s/%s", + genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Namespace, appwrapper.Name) } } } else { - // klog.Infof("[Dispatcher] Cleanup: State=%s\n", appwrapper.Status.State) - //if ! appwrapper.Status.CanRun && appwrapper.Status.IsDispatched { if appwrapper.Status.IsDispatched { queuejobKey, _ := GetQueueJobKey(appwrapper) if obj, ok := cc.dispatchMap[queuejobKey]; ok { @@ -2400,16 +2412,17 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { appwrapper.Status.Running = 0 appwrapper.Status.Succeeded = 0 appwrapper.Status.Failed = 0 - klog.V(10).Infof("[Cleanup] end AppWrapper '%s/%s' Version=%s Status=%+v", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) + klog.V(3).Infof("[Cleanup] end AppWrapper '%s/%s' Version=%s", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion) - return nil + return err.ErrorOrNil() } func (cc *XController) getAppWrapper(namespace string, name string, caller string) (*arbv1.AppWrapper, error) { klog.V(5).Infof("[getAppWrapper] getting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) - // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level if err != nil { - klog.Errorf("[getAppWrapper] getting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) + if !apierrors.IsNotFound(err) { + klog.Errorf("[getAppWrapper] getting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) + } return nil, err } klog.V(5).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) @@ -2422,7 +2435,7 @@ type EtcdErrorClassifier struct { func (c *EtcdErrorClassifier) Classify(err error) retrier.Action { if err == nil { return retrier.Succeed - } else if apiErrors.IsConflict(err) { + } else if apierrors.IsConflict(err) { return retrier.Retry } else { return retrier.Fail diff --git a/pkg/controller/queuejobresources/genericresource/genericresource.go b/pkg/controller/queuejobresources/genericresource/genericresource.go index 4b2879305..38b212373 100644 --- a/pkg/controller/queuejobresources/genericresource/genericresource.go +++ b/pkg/controller/queuejobresources/genericresource/genericresource.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -173,12 +173,10 @@ func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperG err = deleteObject(namespaced, namespace, newName, rsrc, dclient) if err != nil { - if errors.IsAlreadyExists(err) { - klog.V(4).Infof("%v\n", err.Error()) - } else { + if !errors.IsNotFound(err) { klog.Errorf("[Cleanup] Error deleting the object `%v`, the error is `%v`.", newName, errors.ReasonForError(err)) - return name, gvk, err } + return name, gvk, err } } else { klog.Warningf("[Cleanup] %s/%s not found using label selector: %s.\n", name, namespace, labelSelector) @@ -337,7 +335,7 @@ func (gr *GenericResources) SyncQueueJob(aw *arbv1.AppWrapper, awr *arbv1.AppWra return pods, nil } -//checks if object has pod template spec and add new labels +// checks if object has pod template spec and add new labels func addLabelsToPodTemplateField(unstruct *unstructured.Unstructured, labels map[string]string) (hasFields bool) { spec, isFound, _ := unstructured.NestedMap(unstruct.UnstructuredContent(), "spec") if !isFound { @@ -379,7 +377,7 @@ func addLabelsToPodTemplateField(unstruct *unstructured.Unstructured, labels map return isFound } -//checks if object has replicas and containers field +// checks if object has replicas and containers field func hasFields(obj runtime.RawExtension) (hasFields bool, replica float64, containers []v1.Container) { var unstruct unstructured.Unstructured unstruct.Object = make(map[string]interface{}) @@ -461,7 +459,7 @@ func createObject(namespaced bool, namespace string, name string, rsrc schema.Gr } } -func deleteObject(namespaced bool, namespace string, name string, rsrc schema.GroupVersionResource, dclient dynamic.Interface) (erro error) { +func deleteObject(namespaced bool, namespace string, name string, rsrc schema.GroupVersionResource, dclient dynamic.Interface) error { var err error backGround := metav1.DeletePropagationBackground delOptions := metav1.DeleteOptions{PropagationPolicy: &backGround} @@ -473,7 +471,7 @@ func deleteObject(namespaced bool, namespace string, name string, rsrc schema.Gr err = res.Delete(context.Background(), name, delOptions) } - if err != nil { + if err != nil && errors.IsNotFound(err) { klog.Errorf("[deleteObject] Error deleting the object `%v`, the error is `%v`.", name, errors.ReasonForError(err)) return err } else { @@ -597,7 +595,7 @@ func getContainerResources(container v1.Container, replicas float64) *clustersta return req } -//returns status of an item present in etcd +// returns status of an item present in etcd func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, namespace string, appwrapperName string, genericItemName string) (completed bool) { dd := gr.clients.Discovery() apigroups, err := restmapper.GetAPIGroupResources(dd) From 573647252b46834ec91e58790dea2e03e48e555a Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Wed, 5 Jul 2023 14:49:30 +0300 Subject: [PATCH 17/23] Addressed PR review comments Fixed failed test Log message improvements --- .../crd/bases/mcad.ibm.com_appwrappers.yaml | 4 +- .../crds/mcad.ibm.com_appwrappers.yaml | 4 +- pkg/apis/controller/v1beta1/appwrapper.go | 15 ++++--- pkg/controller/clusterstate/cache/cache.go | 2 +- .../queuejob/queuejob_controller_ex.go | 44 +++++++++++-------- .../genericresource/genericresource.go | 2 +- test/e2e/queue.go | 13 ++---- test/e2e/util.go | 4 +- 8 files changed, 45 insertions(+), 43 deletions(-) diff --git a/config/crd/bases/mcad.ibm.com_appwrappers.yaml b/config/crd/bases/mcad.ibm.com_appwrappers.yaml index 0da38b264..3280f8a14 100644 --- a/config/crd/bases/mcad.ibm.com_appwrappers.yaml +++ b/config/crd/bases/mcad.ibm.com_appwrappers.yaml @@ -833,12 +833,12 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer - number-of-requeueings: + numberOfRequeueings: description: Field to keep track of how many times a requeuing event has been triggered format: int32 type: integer default: 0 - requeueing-time-seconds: + requeueingTimeInSeconds: description: Field to keep track of total number of seconds spent in requeueing format: int32 type: integer diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml index 0da38b264..3280f8a14 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml @@ -833,12 +833,12 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer - number-of-requeueings: + numberOfRequeueings: description: Field to keep track of how many times a requeuing event has been triggered format: int32 type: integer default: 0 - requeueing-time-seconds: + requeueingTimeInSeconds: description: Field to keep track of total number of seconds spent in requeueing format: int32 type: integer diff --git a/pkg/apis/controller/v1beta1/appwrapper.go b/pkg/apis/controller/v1beta1/appwrapper.go index 6630ebda1..408404ebe 100644 --- a/pkg/apis/controller/v1beta1/appwrapper.go +++ b/pkg/apis/controller/v1beta1/appwrapper.go @@ -246,7 +246,7 @@ type AppWrapperStatus struct { // Microsecond level timestamp when controller first sees QueueJob (by Informer) ControllerFirstTimestamp metav1.MicroTime `json:"controllerfirsttimestamp,omitempty"` - // Microsecond level timestamp when controller first sets appwrapper in state Running + // Microsecond level timestamp when controller first dispatches appwrapper ControllerFirstDispatchTimestamp metav1.MicroTime `json:"controllerfirstdispatchtimestamp,omitempty"` // Tell Informer to ignore this update message (do not generate a controller event) @@ -264,17 +264,20 @@ type AppWrapperStatus struct { // Represents the latest available observations of pods under appwrapper PendingPodConditions []PendingPodSpec `json:"pendingpodconditions"` - //Resources consumed - + // Represents the number of cpu consumed by all pods belonging to an appwrapper. TotalCPU float64 `json:"totalcpu,omitempty"` + // Represents the amount of memory consumed by all pods belonging to an appwrapper. TotalMemory float64 `json:"totalmemory,omitempty"` + // Represents the total number of GPUs consumed by all pods belonging to an appwrapper. TotalGPU int64 `json:"totalgpu,omitempty"` - // Re-queueing state fields - RequeueingTimeInSeconds int `json:"requeueing-time-seconds,omitempty"` - NumberOfRequeueings int `json:"number-of-requeueings,omitempty"` + // Field to keep track of total number of seconds spent in requeueing + RequeueingTimeInSeconds int `json:"requeueingTimeInSeconds,omitempty"` + + // Field to keep track of how many times a requeuing event has been triggered + NumberOfRequeueings int `json:"numberOfRequeueings,omitempty"` } type AppWrapperState string diff --git a/pkg/controller/clusterstate/cache/cache.go b/pkg/controller/clusterstate/cache/cache.go index 9ae976e0e..e011438e1 100644 --- a/pkg/controller/clusterstate/cache/cache.go +++ b/pkg/controller/clusterstate/cache/cache.go @@ -334,7 +334,7 @@ func (sc *ClusterStateCache) updateState() error { } func (sc *ClusterStateCache) deleteJob(job *api.JobInfo) { - klog.V(4).Infof("[deleteJob] Attempting to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) + klog.V(10).Infof("[deleteJob] Attempting to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) time.AfterFunc(5*time.Second, func() { sc.deletedJobs.AddIfNotPresent(job) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 3308e91cf..83f223f4c 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -1222,10 +1222,10 @@ func (qjm *XController) ScheduleNext() { qj.Status.FilterIgnore = true // update QueueJobState only retryErr = qjm.updateStatusInEtcd(qj, "ScheduleNext - setHOL") if retryErr != nil { - if apierrors.IsConflict(err) { + if apierrors.IsConflict(retryErr) { klog.Warningf("[ScheduleNext] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) } else { - klog.Errorf("[ScheduleNext] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) + klog.Errorf("[ScheduleNext] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, retryErr) } return retryErr } @@ -1908,7 +1908,7 @@ func (cc *XController) worker() { func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // validate that app wraper has not been marked for deletion by the infomer's delete handler if qj.DeletionTimestamp != nil { - klog.Infof("[syncQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) + klog.V(3).Infof("[syncQueueJob] AW job=%s/%s set for deletion.", qj.Namespace, qj.Name) // cleanup resources for running job, ignoring errors if err00 := cc.Cleanup(qj); err00 != nil { klog.Warningf("Failed to cleanup resources for app wrapper '%s/%s', err = %v", qj.Namespace, qj.Name, err00) @@ -1919,7 +1919,7 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { } // we delete the job from the queue if it is there, ignoring errors cc.qjqueue.Delete(qj) - klog.Infof("[syncQueueJob] AW job=%s/%s deleted.", qj.Name, qj.Namespace) + klog.V(3).Infof("[syncQueueJob] AW job=%s/%s deleted.", qj.Namespace, qj.Name) return nil } cacheAWJob, err := cc.getAppWrapper(qj.Namespace, qj.Name, "[syncQueueJob] get fresh appwrapper ") @@ -1962,19 +1962,25 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // Update etcd conditions if AppWrapper Job has at least 1 running pod and transitioning from dispatched to running. if (awNew.Status.QueueJobState != arbv1.AppWrapperCondRunning) && (awNew.Status.Running > 0) { - awNew.Status.QueueJobState = arbv1.AppWrapperCondRunning - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") - awNew.Status.Conditions = append(awNew.Status.Conditions, cond) - awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - err := cc.updateStatusInEtcd(awNew, "[syncQueueJob] Update pod counts") - if err != nil { - if apierrors.IsConflict(err) { - klog.Warningf("[syncQueueJob] Conflict detected when updating pod status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Warningf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, err) + syncQueueJob := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + syncQueueJob.SetJitter(0.05) + err := syncQueueJob.Run(func() error { + awNew.Status.QueueJobState = arbv1.AppWrapperCondRunning + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") + awNew.Status.Conditions = append(awNew.Status.Conditions, cond) + awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning + retryErr := cc.updateStatusInEtcd(awNew, "[syncQueueJob] Update pod counts") + if retryErr != nil { + if apierrors.IsConflict(retryErr) { + klog.Warningf("[syncQueueJob] Conflict detected when updating pod status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } else { + klog.Warningf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, retryErr) + } + return retryErr } - return err - } + return nil + }) + return err } //For debugging? @@ -2001,9 +2007,9 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool defer func() { klog.V(10).Infof("[manageQueueJob] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) }() - preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - preemptRetrier.SetJitter(0.05) - err := preemptRetrier.Run(func() error { + manageQueueJobRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + manageQueueJobRetrier.SetJitter(0.05) + err := manageQueueJobRetrier.Run(func() error { cacheAWJob, retryErr := cc.getAppWrapper(qj.Namespace, qj.Name, "[manageQueueJob] get fresh appwrapper ") if retryErr != nil { // Implicit detection of deletion diff --git a/pkg/controller/queuejobresources/genericresource/genericresource.go b/pkg/controller/queuejobresources/genericresource/genericresource.go index 38b212373..e45e380df 100644 --- a/pkg/controller/queuejobresources/genericresource/genericresource.go +++ b/pkg/controller/queuejobresources/genericresource/genericresource.go @@ -471,7 +471,7 @@ func deleteObject(namespaced bool, namespace string, name string, rsrc schema.Gr err = res.Delete(context.Background(), name, delOptions) } - if err != nil && errors.IsNotFound(err) { + if err != nil && !errors.IsNotFound(err) { klog.Errorf("[deleteObject] Error deleting the object `%v`, the error is `%v`.", name, errors.ReasonForError(err)) return err } else { diff --git a/test/e2e/queue.go b/test/e2e/queue.go index ad0dac768..ba5912b19 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -671,20 +671,13 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericDeploymentAWWithMultipleItems(context, appendRandomString("aw-deployment-2-status")) time.Sleep(1 * time.Minute) err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred()) + Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-status") aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - if err != nil { - fmt.Fprintf(GinkgoWriter, "Error getting status, %v", err) - } - pass := false + Expect(err).NotTo(HaveOccurred(), "Expecting to get app wrapper status") fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) - if aw1.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { - pass = true - } - Expect(pass).To(BeTrue()) + Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateRunningHoldCompletion)) appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed.\n") - }) It("MCAD Service no RuningHoldCompletion or Complete Test", func() { diff --git a/test/e2e/util.go b/test/e2e/util.go index e26e940e0..ba4d2f8c6 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -2258,7 +2258,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * "app": "aw-deployment-2-status" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-status" + "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" } }, "spec": { @@ -2299,7 +2299,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * "app": "aw-deployment-3-status" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-3-status" + "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" } }, "spec": { From b47716bd2b220cc0858b2eb896afc66e85980fee Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Thu, 6 Jul 2023 21:28:37 +0300 Subject: [PATCH 18/23] Fixed failing test. --- test/e2e/queue.go | 10 +- test/e2e/util.go | 110 +++++++++-------- test/yaml/0007-fast-preemption-test.yaml | 146 ----------------------- test/yaml/0008-aw-default.yaml | 56 +++++++++ 4 files changed, 122 insertions(+), 200 deletions(-) delete mode 100644 test/yaml/0007-fast-preemption-test.yaml create mode 100644 test/yaml/0008-aw-default.yaml diff --git a/test/e2e/queue.go b/test/e2e/queue.go index ba5912b19..1b9b07dd5 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -661,14 +661,15 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).To(HaveOccurred(), "No pods for app wrapper `aw-deployment-2-426cpu` are expected.") }) - It("MCAD Deployment RuningHoldCompletion Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Started.\n") + It("MCAD Deployment RunningHoldCompletion Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RunningHoldCompletion Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers defer cleanupTestObjectsPtr(context, appwrappersPtr) aw := createGenericDeploymentAWWithMultipleItems(context, appendRandomString("aw-deployment-2-status")) + appwrappers = append(appwrappers, aw) time.Sleep(1 * time.Minute) err1 := waitAWPodsReady(context, aw) Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-status") @@ -676,12 +677,11 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).NotTo(HaveOccurred(), "Expecting to get app wrapper status") fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateRunningHoldCompletion)) - appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed.\n") }) - It("MCAD Service no RuningHoldCompletion or Complete Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD Service no RuningHoldCompletion or Complete Test - Started.\n") + It("MCAD Service no RunningHoldCompletion or Complete Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD Service no RunningHoldCompletion or Complete Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers diff --git a/test/e2e/util.go b/test/e2e/util.go index ba4d2f8c6..30769b9a4 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -2237,25 +2237,78 @@ func createGenericServiceAWWithNoStatus(context *context, name string) *arbv1.Ap func createGenericDeploymentAWWithMultipleItems(context *context, name string) *arbv1.AppWrapper { rb := []byte(`{"apiVersion": "apps/v1", - "kind": "Deployment", + "kind": "Deployment", + "metadata": { + "name": "` + name + `-deployment-1", + "namespace": "test", + "labels": { + "app": "` + name + `-deployment-1" + } + }, + "spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "` + name + `-deployment-1" + } + }, + "template": { + "metadata": { + "labels": { + "app": "` + name + `-deployment-1" + }, + "annotations": { + "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" + } + }, + "spec": { + "initContainers": [ + { + "name": "job-init-container", + "image": "k8s.gcr.io/busybox:latest", + "command": ["sleep", "200"], + "resources": { + "requests": { + "cpu": "500m" + } + } + } + ], + "containers": [ + { + "name": "` + name + `-deployment-1", + "image": "kicbase/echo-server:1.0", + "ports": [ + { + "containerPort": 80 + } + ] + } + ] + } + } + }} `) + rb1 := []byte(`{"apiVersion": "apps/v1", + "kind": "Deployment", "metadata": { - "name": "aw-deployment-2-status", + "name": "` + name + `-deployment-2", "namespace": "test", "labels": { - "app": "aw-deployment-2-status" + "app": "` + name + `-deployment-2" } }, + "spec": { "replicas": 1, "selector": { "matchLabels": { - "app": "aw-deployment-2-status" + "app": "` + name + `-deployment-2" } }, "template": { "metadata": { "labels": { - "app": "aw-deployment-2-status" + "app": "` + name + `-deployment-2" }, "annotations": { "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" @@ -2264,7 +2317,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * "spec": { "containers": [ { - "name": "aw-deployment-2-status", + "name": "` + name + `-deployment-2", "image": "kicbase/echo-server:1.0", "ports": [ { @@ -2277,47 +2330,6 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * } }} `) - rb1 := []byte(`{"apiVersion": "apps/v1", - "kind": "Deployment", -"metadata": { - "name": "aw-deployment-3-status", - "namespace": "test", - "labels": { - "app": "aw-deployment-3-status" - } -}, -"spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "aw-deployment-3-status" - } - }, - "template": { - "metadata": { - "labels": { - "app": "aw-deployment-3-status" - }, - "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" - } - }, - "spec": { - "containers": [ - { - "name": "aw-deployment-3-status", - "image": "kicbase/echo-server:1.0", - "ports": [ - { - "containerPort": 80 - } - ] - } - ] - } - } -}} `) - var schedSpecMin int = 1 aw := &arbv1.AppWrapper{ @@ -2333,7 +2345,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * GenericItems: []arbv1.AppWrapperGenericResource{ { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", name, "aw-deployment-2-status"), + Name: fmt.Sprintf("%s-%s", name, "deployment-1"), Namespace: "test", }, DesiredAvailable: 1, @@ -2344,7 +2356,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * }, { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", name, "aw-deployment-3-status"), + Name: fmt.Sprintf("%s-%s", name, "deployment-2"), Namespace: "test", }, DesiredAvailable: 1, diff --git a/test/yaml/0007-fast-preemption-test.yaml b/test/yaml/0007-fast-preemption-test.yaml deleted file mode 100644 index 67b106bd6..000000000 --- a/test/yaml/0007-fast-preemption-test.yaml +++ /dev/null @@ -1,146 +0,0 @@ -#apiVersion: mcad.ibm.com/v1beta1 -#kind: AppWrapper -#metadata: -# name: aw-deployment-2-550cpu-bbskat -# namespace: test -#spec: -# schedulingSpec: -# minAvailable: 2 -# requeuing: -# growthType: exponential -# maxNumRequeuings: 0 -# maxTimeInSeconds: 0 -# timeInSeconds: 60 -# resources: -# Items: -# - allocatedreplicas: 0 -# priorityslope: 0 -# replicas: 1 -# template: -# apiVersion: apps/v1 -# kind: Deployment -# metadata: -# labels: -# app: aw-deployment-2-550cpu-bbskat -# name: aw-deployment-2-550cpu-bbskat -# namespace: test -# spec: -# replicas: 2 -# selector: -# matchLabels: -# app: aw-deployment-2-550cpu-bbskat -# template: -# metadata: -# annotations: -# appwrapper.mcad.ibm.com/appwrapper-name: aw-deployment-2-550cpu-bbskat -# labels: -# app: aw-deployment-2-550cpu-bbskat -# spec: -# containers: -# - image: kicbase/echo-server:1.0 -# name: aw-deployment-2-550cpu-bbskat -# ports: -# - containerPort: 80 -# resources: -# requests: -# cpu: 550m ---- -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: aw-ff-deployment-1-850-cpu-ccdc29 - namespace: test -spec: - schedulingSpec: - minAvailable: 1 - requeuing: - growthType: exponential - maxNumRequeuings: 0 - maxTimeInSeconds: 0 - numRequeuings: 0 - timeInSeconds: 60 - resources: - custompodresources: - - replicas: 1 - requests: - cpu: 850m - generictemplate: - apiVersion: apps/v1 - kind: Deployment - metadata: - labels: - app: aw-ff-deployment-1-850-cpu-wkh8pa - name: aw-ff-deployment-1-850-cpu-wkh8pa - namespace: test - spec: - replicas: 1 - selector: - matchLabels: - app: aw-ff-deployment-1-850-cpu-wkh8pa - template: - metadata: - annotations: - appwrapper.mcad.ibm.com/appwrapper-name: aw-ff-deployment-1-850-cpu-wkh8pa - labels: - app: aw-ff-deployment-1-850-cpu-wkh8pa - spec: - containers: - - image: kicbase/echo-server:1.0 - name: aw-ff-deployment-1-850-cpu-wkh8pa - ports: - - containerPort: 80 - resources: - requests: - cpu: 850m - priority: 0 - priorityslope: 0 ---- -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: aw-ff-deployment-2-340-cpu-w6xkoe - namespace: test -spec: - schedulingSpec: - minAvailable: 2 - requeuing: - growthType: exponential - maxNumRequeuings: 0 - timeInSeconds: 60 - resources: - GenericItems: - - allocated: 0 - custompodresources: - - replicas: 2 - requests: - cpu: 340m - generictemplate: - apiVersion: apps/v1 - kind: Deployment - metadata: - labels: - app: aw-ff-deployment-2-340-cpu-w6xkoe - name: aw-ff-deployment-2-340-cpu-w6xkoe - namespace: test - spec: - replicas: 2 - selector: - matchLabels: - app: aw-ff-deployment-2-340-cpu-w6xkoe - template: - metadata: - annotations: - appwrapper.mcad.ibm.com/appwrapper-name: aw-ff-deployment-2-340-cpu-w6xkoe - labels: - app: aw-ff-deployment-2-340-cpu-w6xkoe - spec: - containers: - - image: kicbase/echo-server:1.0 - name: aw-ff-deployment-2-340-cpu-w6xkoe - ports: - - containerPort: 80 - resources: - requests: - cpu: 340m - priority: 0 - priorityslope: 0 diff --git a/test/yaml/0008-aw-default.yaml b/test/yaml/0008-aw-default.yaml new file mode 100644 index 000000000..94653d0c0 --- /dev/null +++ b/test/yaml/0008-aw-default.yaml @@ -0,0 +1,56 @@ +apiVersion: mcad.ibm.com/v1beta1 +kind: AppWrapper +metadata: + name: defaultaw-schd-spec-with-timeout-1 + namespace: default +spec: + # schedulingSpec: + # minAvailable: 1 + # requeuing: + # timeInSeconds: 120 + # growthType: "exponential" + priority: 9 + resources: + GenericItems: + - replicas: 1 + # completionstatus: Complete + custompodresources: + - replicas: 1 + requests: + cpu: 500m + memory: 512Mi + nvidia.com/gpu: 0 + limits: + cpu: 500m + memory: 512Mi + nvidia.com/gpu: 0 + generictemplate: + apiVersion: batch/v1 + kind: Job + metadata: + namespace: default + name: defaultaw-schd-spec-with-timeout-1 + # labels: + # appwrapper.mcad.ibm.com: defaultaw-schd-spec-with-timeout-1 + spec: + parallelism: 1 + completions: 1 + template: + metadata: + namespace: default + labels: + appwrapper.mcad.ibm.com: "defaultaw-schd-spec-with-timeout-1" + spec: + containers: + - name: defaultaw-schd-spec-with-timeout-1 + image: ubuntu:latest + command: [ "/bin/bash", "-c", "--" ] + args: [ "sleep 10" ] + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "512Mi" + cpu: "500m" + restartPolicy: Never From 0ff4d325cdefdd036cf7219f35a74c2d2addd36e Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Mon, 10 Jul 2023 19:08:22 +0300 Subject: [PATCH 19/23] Fixed bugs in the manage queque job e2e test updates. --- .../queuejob/queuejob_controller_ex.go | 944 +++++++++--------- test/e2e/queue.go | 12 +- 2 files changed, 454 insertions(+), 502 deletions(-) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 83f223f4c..fdc543b01 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -434,138 +434,117 @@ func (qjm *XController) PreemptQueueJobs() { continue } - preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - preemptRetrier.SetJitter(0.05) - err := preemptRetrier.Run(func() error { - var updateNewJob *arbv1.AppWrapper - var message string - newjob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh app wrapper") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found. ", aw.Namespace, aw.Name) - return nil - } else { - klog.Warningf("[PreemptQueueJobs] failed in retrieving a fresh copy of the app wrapper '%s/%s', err=%v. Will try to preempt on the next run.", aw.Namespace, aw.Name, retryErr) - } - } - newjob.Status.CanRun = false - newjob.Status.FilterIgnore = true // update QueueJobState only - cleanAppWrapper := false - //If dispatch deadline is exceeded no matter what the state of AW, kill the job and set status as Failed. - if (aw.Status.State == arbv1.AppWrapperStateActive) && (aw.Spec.SchedSpec.DispatchDuration.Limit > 0) { - if aw.Spec.SchedSpec.DispatchDuration.Overrun { - index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "DispatchDeadlineExceeded") - if index < 0 { - message = fmt.Sprintf("Dispatch deadline exceeded. allowed to run for %v seconds", aw.Spec.SchedSpec.DispatchDuration.Limit) - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", message) - newjob.Status.Conditions = append(newjob.Status.Conditions, cond) - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", "") - newjob.Status.Conditions[index] = *cond.DeepCopy() - } - //should the AW state be set in this method?? - newjob.Status.State = arbv1.AppWrapperStateFailed - newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed - newjob.Status.Running = 0 - updateNewJob = newjob.DeepCopy() - - if retryErr := qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded"); retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) - return nil - } else if apierrors.IsConflict(retryErr) { - klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) - } - return retryErr - } - //cannot use cleanup AW, since it puts AW back in running state - go qjm.qjqueue.AddUnschedulableIfNotPresent(updateNewJob) - - //Move to next AW - return nil - } - } - - if ((aw.Status.Running + aw.Status.Succeeded) < int32(aw.Spec.SchedSpec.MinAvailable)) && aw.Status.State == arbv1.AppWrapperStateActive { - index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "MinPodsNotRunning") + var updateNewJob *arbv1.AppWrapper + var message string + newjob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh app wrapper") + if err != nil { + klog.Warningf("[PreemptQueueJobs] failed in retrieving a fresh copy of the app wrapper '%s/%s', err=%v. Will try to preempt on the next run.", aw.Namespace, aw.Name, err) + continue + } + newjob.Status.CanRun = false + newjob.Status.FilterIgnore = true // update QueueJobState only + cleanAppWrapper := false + //If dispatch deadline is exceeded no matter what the state of AW, kill the job and set status as Failed. + if (aw.Status.State == arbv1.AppWrapperStateActive) && (aw.Spec.SchedSpec.DispatchDuration.Limit > 0) { + if aw.Spec.SchedSpec.DispatchDuration.Overrun { + index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "DispatchDeadlineExceeded") if index < 0 { - message = fmt.Sprintf("Insufficient number of Running and Completed pods, minimum=%d, running=%d, completed=%d.", aw.Spec.SchedSpec.MinAvailable, aw.Status.Running, aw.Status.Succeeded) - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", message) + message = fmt.Sprintf("Dispatch deadline exceeded. allowed to run for %v seconds", aw.Spec.SchedSpec.DispatchDuration.Limit) + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", message) newjob.Status.Conditions = append(newjob.Status.Conditions, cond) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", "") + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "DispatchDeadlineExceeded", "") newjob.Status.Conditions[index] = *cond.DeepCopy() } + //should the AW state be set in this method?? + newjob.Status.State = arbv1.AppWrapperStateFailed + newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed + newjob.Status.Running = 0 + updateNewJob = newjob.DeepCopy() - if aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { - aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = aw.Spec.SchedSpec.Requeuing.TimeInSeconds - } - if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { - if newjob.Status.RequeueingTimeInSeconds == 0 { - newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds - } else { - newjob.Status.RequeueingTimeInSeconds += newjob.Status.RequeueingTimeInSeconds - } - } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { - newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds - } - - if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds > 0 { - if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Status.RequeueingTimeInSeconds { - newjob.Status.RequeueingTimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds - } + err := qjm.updateStatusInEtcdWithRetry(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded") + if err != nil { + klog.Warningf("[PreemptQueueJobs] status update CanRun: false -- DispatchDeadlineExceeded for '%s/%s' failed", aw.Namespace, aw.Name) + continue } + //cannot use cleanup AW, since it puts AW back in running state + go qjm.qjqueue.AddUnschedulableIfNotPresent(updateNewJob) - if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Spec.SchedSpec.Requeuing.NumRequeuings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { - newjob.Status.State = arbv1.AppWrapperStateDeleted - cleanAppWrapper = true - } else { - newjob.Status.NumberOfRequeueings += 1 - } + //Move to next AW + continue + } + } - updateNewJob = newjob.DeepCopy() + if ((aw.Status.Running + aw.Status.Succeeded) < int32(aw.Spec.SchedSpec.MinAvailable)) && aw.Status.State == arbv1.AppWrapperStateActive { + index := getIndexOfMatchedCondition(aw, arbv1.AppWrapperCondPreemptCandidate, "MinPodsNotRunning") + if index < 0 { + message = fmt.Sprintf("Insufficient number of Running and Completed pods, minimum=%d, running=%d, completed=%d.", aw.Spec.SchedSpec.MinAvailable, aw.Status.Running, aw.Status.Succeeded) + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", message) + newjob.Status.Conditions = append(newjob.Status.Conditions, cond) } else { - //If pods failed scheduling generate new preempt condition - message = fmt.Sprintf("Pods failed scheduling failed=%v, running=%v.", len(aw.Status.PendingPodConditions), aw.Status.Running) - index := getIndexOfMatchedCondition(newjob, arbv1.AppWrapperCondPreemptCandidate, "PodsFailedScheduling") - //ignore co-scheduler failed scheduling events. This is a temp - //work around until co-scheduler version 0.22.X perf issues are resolved. - if index < 0 { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) - newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "MinPodsNotRunning", "") + newjob.Status.Conditions[index] = *cond.DeepCopy() + } + + if aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { + aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = aw.Spec.SchedSpec.Requeuing.TimeInSeconds + } + if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { + if newjob.Status.RequeueingTimeInSeconds == 0 { + newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) - newjob.Status.Conditions[index] = *cond.DeepCopy() + newjob.Status.RequeueingTimeInSeconds += newjob.Status.RequeueingTimeInSeconds } - - updateNewJob = newjob.DeepCopy() + } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { + newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds } - retryErr = qjm.updateStatusInEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false -- MinPodsNotRunning") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[PreemptQueueJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) - return nil - } else if apierrors.IsConflict(retryErr) { - klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) + if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds > 0 { + if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Status.RequeueingTimeInSeconds { + newjob.Status.RequeueingTimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds } - return retryErr } - if cleanAppWrapper { - klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of re-queueing(s) exceeded.", aw.Name, aw.Namespace) - go qjm.Cleanup(updateNewJob) + if newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings > 0 && newjob.Spec.SchedSpec.Requeuing.NumRequeuings == newjob.Spec.SchedSpec.Requeuing.MaxNumRequeuings { + newjob.Status.State = arbv1.AppWrapperStateDeleted + cleanAppWrapper = true } else { - //Only back-off AWs that are in state running and not in state Failed - if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { - klog.Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to back off queue.", aw.Name, aw.Namespace) - go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) - } + newjob.Status.NumberOfRequeueings += 1 } - return nil - }) + + updateNewJob = newjob.DeepCopy() + } else { + //If pods failed scheduling generate new preempt condition + message = fmt.Sprintf("Pods failed scheduling failed=%v, running=%v.", len(aw.Status.PendingPodConditions), aw.Status.Running) + index := getIndexOfMatchedCondition(newjob, arbv1.AppWrapperCondPreemptCandidate, "PodsFailedScheduling") + //ignore co-scheduler failed scheduling events. This is a temp + //work around until co-scheduler version 0.22.X perf issues are resolved. + if index < 0 { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) + newjob.Status.Conditions = append(newjob.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondPreemptCandidate, v1.ConditionTrue, "PodsFailedScheduling", message) + newjob.Status.Conditions[index] = *cond.DeepCopy() + } + + updateNewJob = newjob.DeepCopy() + } + + err = qjm.updateStatusInEtcdWithRetry(updateNewJob, "PreemptQueueJobs - CanRun: false -- MinPodsNotRunning") if err != nil { - klog.Errorf("[PreemptQueueJobs] failed preemption for app wrapper %s/%s, err= %v. Will attempt on the next run", aw.Name, aw.Namespace, err) + klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' failed, skipping app wrapper err =%v", aw.Namespace, aw.Name, err) + continue + } + + if cleanAppWrapper { + klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of re-queueing(s) exceeded.", aw.Name, aw.Namespace) + go qjm.Cleanup(updateNewJob) + } else { + //Only back-off AWs that are in state running and not in state Failed + if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { + klog.Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to back off queue.", aw.Name, aw.Namespace) + go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) + } } } } @@ -576,33 +555,24 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { } for _, aw := range preemptAWs { - preemptRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - preemptRetrier.SetJitter(0.05) - err := preemptRetrier.Run(func() error { - apiCacheAWJob, retryErr := qjm.getAppWrapper(aw.Namespace, aw.Name, "[preemptAWJobs] get fresh app wrapper") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when getting a fresh copy. ", aw.Namespace, aw.Name) - return nil - } - klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %s/%s: err = %v", - aw.Namespace, aw.Name, retryErr) - return retryErr - } - apiCacheAWJob.Status.CanRun = false - if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) - return nil - } else if apierrors.IsConflict(retryErr) { - klog.Warningf("[preemptAWJobs] status update for '%s/%s' detected conflict. Retrying", aw.Namespace, aw.Name) - } - return retryErr + apiCacheAWJob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[preemptAWJobs] get fresh app wrapper") + if err != nil { + if apierrors.IsNotFound(err) { + klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when getting a fresh copy. ", aw.Namespace, aw.Name) + continue } - return nil - }) + klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %s/%s: err = %v", + aw.Namespace, aw.Name, err) + continue + } + apiCacheAWJob.Status.CanRun = false + err = qjm.updateStatusInEtcdWithRetry(apiCacheAWJob, "preemptAWJobs - CanRun: false") if err != nil { - klog.Errorf("[preemptAWJobs] Failed to preempt app Wrapper '%s/%s'. App wrapper is not found", aw.Namespace, aw.Name) + if apierrors.IsNotFound(err) { + klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) + continue + } + klog.Warningf("[preemptAWJobs] status update for '%s/%s' failed, err=%v", aw.Namespace, aw.Name, err) } } } @@ -742,7 +712,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb // Get all pods and related resources countCompletionRequired := 0 - for _, genericItem := range caw.Spec.AggrResources.GenericItems { + for i, genericItem := range caw.Spec.AggrResources.GenericItems { if len(genericItem.CompletionStatus) > 0 { objectName := genericItem.GenericTemplate var unstruct unstructured.Unstructured @@ -760,13 +730,12 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } if len(name) == 0 { - klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) + klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: '%s/%s", caw.Namespace, caw.Name) } - klog.Infof("[getAppWrapperCompletionStatus] Checking items completed for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) - + klog.V(4).Infof("[getAppWrapperCompletionStatus] Checking if item %d named %s completed for appwrapper: '%s/%s'...", i+1, name, caw.Namespace, caw.Name) status := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) if !status { - klog.Infof("[getAppWrapperCompletionStatus] Items not completed for appwrapper: %s in namespace: %s", caw.Name, caw.Namespace) + klog.V(4).Infof("[getAppWrapperCompletionStatus] Item %d named %s not completed for appwrapper: '%s/%s'", i+1, name, caw.Namespace, caw.Name) //early termination because a required item is not completed return caw.Status.State } @@ -776,7 +745,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } - klog.V(4).Infof("[getAppWrapperCompletionStatus] '%s/%s' countCompletionRequired %v, podsRunning %v, podsPending %v", caw.Namespace, caw.Name, countCompletionRequired, caw.Status.Running, caw.Status.Pending) + klog.V(4).Infof("[getAppWrapperCompletionStatus] App wrapper '%s/%s' countCompletionRequired %d, podsRunning %d, podsPending %d", caw.Namespace, caw.Name, countCompletionRequired, caw.Status.Running, caw.Status.Pending) //Set new status only when completion required flag is present in genericitems array if countCompletionRequired > 0 { @@ -1124,13 +1093,12 @@ func (qjm *XController) ScheduleNext() { // check if we have enough compute resources for it // if we have enough compute resources then we set the AllocatedReplicas to the total // amount of resources asked by the job - qjm.schedulingMutex.Lock() qj, err := qjm.qjqueue.Pop() if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) - qjm.schedulingMutex.Unlock() return // Try to pop qjqueue again } + qjm.schedulingMutex.Lock() qjm.schedulingAW = qj qjm.schedulingMutex.Unlock() // ensure that current active appwrapper is reset at the end of this function, to prevent @@ -1399,6 +1367,7 @@ func (qjm *XController) ScheduleNext() { } // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched if fits { + // aw is ready to go! tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] -- ready to dispatch") if retryErr != nil { @@ -1425,9 +1394,9 @@ func (qjm *XController) ScheduleNext() { if apierrors.IsNotFound(retryErr) { klog.Warningf("[ScheduleNext] [Agent Mode] app wrapper '%s/%s' not found after status update, skiping dispatch.", qj.Namespace, qj.Name) return nil - } else if apierrors.IsConflict(err) { + } else if apierrors.IsConflict(retryErr) { klog.Warningf("[ScheduleNext] [Agent mode] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) - } else { + } else if retryErr != nil { klog.Errorf("[ScheduleNext] [Agent mode] Failed to update status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) } return retryErr @@ -1524,9 +1493,41 @@ func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, c klog.Warningf("[updateStatusInEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) klog.Warningf("[updateStatusInEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) } + updatedAppwrapper.DeepCopyInto(currentAppwrapper) klog.V(4).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) return nil } +func (cc *XController) updateStatusInEtcdWithRetry(source *arbv1.AppWrapper, caller string) error { + klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] trying to update '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) + source.Status.Sender = "before " + caller // set Sender string to indicate code location + updateStatusRetrierRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) + updateStatusRetrierRetrier.SetJitter(0.05) + updatedAW := source.DeepCopy() + err := updateStatusRetrierRetrier.Run(func() error { + var retryErr error + updatedAW, retryErr = cc.arbclients.ArbV1().AppWrappers(updatedAW.Namespace).UpdateStatus(updatedAW) + if retryErr != nil && apierrors.IsConflict(retryErr) { + dest, retryErr := cc.getAppWrapper(source.Namespace, source.Name, caller) + if retryErr != nil && !apierrors.IsNotFound(retryErr) { + klog.Warningf("[updateStatusInEtcdWithMergeFunction] retrying the to update '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) + source.Status.DeepCopyInto(&dest.Status) + dest.Status.Sender = "before " + caller // set Sender string to indicate code location + dest.DeepCopyInto(updatedAW) + } + return retryErr + } + if retryErr == nil { + updatedAW.DeepCopyInto(source) + } + return retryErr + }) + if err != nil { + klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] update failure '%s/%s' called by '%s'", source.Namespace, source.Name, caller) + return err + } + klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] update success '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) + return nil +} func (qjm *XController) addOrUpdateCondition(aw *arbv1.AppWrapper, condType arbv1.AppWrapperConditionType, condStatus v1.ConditionStatus, condReason string, condMsg string) { @@ -1569,7 +1570,7 @@ func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message stri qjm.addOrUpdateCondition(apiCacheAWJob, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "[backoff] - Rejoining"); retryErr != nil { if apierrors.IsConflict(retryErr) { - klog.Warningf("[backoff] Conflict on when upating AW status in etcd '%s/%s'. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) + klog.Warningf("[backoff] Conflict when upating AW status in etcd '%s/%s'. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) } return retryErr } @@ -1645,7 +1646,7 @@ func (qjm *XController) UpdateQueueJobs() { // retrieve queueJobs from local cache. no guarantee queueJobs contain up-to-date information queueJobs, err := qjm.queueJobLister.AppWrappers("").List(labels.Everything()) if err != nil { - klog.Errorf("[UpdateQueueJobs] List of queueJobs err=%+v", err) + klog.Errorf("[UpdateQueueJobs] Failed to get a list of active appwrappers, err=%+v", err) return } for _, newjob := range queueJobs { @@ -1662,20 +1663,22 @@ func (qjm *XController) UpdateQueueJobs() { LastTransitionMicroTime: metav1.NowMicro(), }, } - klog.V(3).Infof("[UpdateQueueJobs] %s 0Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", - newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob.CreationTimestamp, newjob.Status.ControllerFirstTimestamp) + klog.V(6).Infof("[UpdateQueueJobs] Found new appwraper '%s/%s' 0Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", + newjob.Namespace, newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob.CreationTimestamp, newjob.Status.ControllerFirstTimestamp) } //only set if appwrapper is running and dispatch time is not set previously if newjob.Status.QueueJobState == "Running" && newjob.Status.ControllerFirstDispatchTimestamp.String() == "0001-01-01 00:00:00 +0000 UTC" { newjob.Status.ControllerFirstDispatchTimestamp = firstTime } - klog.V(10).Infof("[UpdateQueueJobs] %s: qjqueue=%t &qj=%p Version=%s Status=%+v", newjob.Name, qjm.qjqueue.IfExist(newjob), newjob, newjob.ResourceVersion, newjob.Status) + klog.V(6).Infof("[UpdateQueueJobs] %s: qjqueue=%t &qj=%p Version=%s Status=%+v", newjob.Name, qjm.qjqueue.IfExist(newjob), newjob, newjob.ResourceVersion, newjob.Status) // check eventQueue, qjqueue in program sequence to make sure job is not in qjqueue if _, exists, _ := qjm.eventQueue.Get(newjob); exists { + klog.V(6).Infof("[UpdateQueueJobs] app wrapper %s/%s found in the event queue, not adding it", newjob.Namespace, newjob.Name) continue } // do not enqueue if already in eventQueue if qjm.qjqueue.IfExist(newjob) { + klog.V(6).Infof("[UpdateQueueJobs] app wrapper %s/%s found in the job queue, not adding it", newjob.Namespace, newjob.Name) continue } // do not enqueue if already in qjqueue @@ -1683,7 +1686,7 @@ func (qjm *XController) UpdateQueueJobs() { if err != nil { klog.Errorf("[UpdateQueueJobs] Fail to enqueue %s to eventQueue, ignore. *Delay=%.6f seconds &qj=%p Version=%s Status=%+v err=%#v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status, err) } else { - klog.V(10).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) + klog.V(6).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) } } } @@ -1713,10 +1716,10 @@ func (cc *XController) addQueueJob(obj interface{}) { qj.Name, qj.Status.State, qj.Status.ControllerFirstTimestamp, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds()) } - klog.V(10).Infof("[Informer-addQJ] %s Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", + klog.V(6).Infof("[Informer-addQJ] %s Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.CreationTimestamp, qj.Status.ControllerFirstTimestamp) - klog.V(4).Infof("[Informer-addQJ] enqueue %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) + klog.V(6).Infof("[Informer-addQJ] enqueue %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) cc.enqueue(qj) } @@ -1733,16 +1736,16 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { } // AppWrappers may come out of order. Ignore old ones. if (oldQJ.Namespace == newQJ.Namespace) && (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { - klog.V(4).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Namespace, oldQJ.Name, oldQJ, oldQJ) - klog.V(4).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Namespace, newQJ.Name, newQJ, newQJ) + klog.V(6).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Namespace, oldQJ.Name, oldQJ, oldQJ) + klog.V(6).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Namespace, newQJ.Name, newQJ, newQJ) return } if equality.Semantic.DeepEqual(newQJ.Status, oldQJ.Status) { - klog.Warningf("[Informer-updateQJ] No change to status field of AppWrapper: '%s/%s', oldAW=%+v, newAW=%+v.", newQJ.Namespace, newQJ.Name, oldQJ.Status, newQJ.Status) + klog.V(6).Infof("[Informer-updateQJ] No change to status field of AppWrapper: '%s/%s', oldAW=%+v, newAW=%+v.", newQJ.Namespace, newQJ.Name, oldQJ.Status, newQJ.Status) } - klog.V(4).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) + klog.V(6).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) cc.enqueue(newQJ) } @@ -1958,29 +1961,21 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", qj.Name, err) return err } - klog.V(10).Infof("[syncQueueJob] AW popped from event queue %s &qj=%p Version=%s Status=%+v", awNew.Name, awNew, awNew.ResourceVersion, awNew.Status) + klog.Infof("[syncQueueJob] Pod counts updated for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", awNew.Namespace, awNew.Name, awNew.ResourceVersion, + awNew.Status.CanRun, awNew.Status.State, awNew.Status.Pending, awNew.Status.Running, awNew.Status.Succeeded, awNew.Status.Failed) // Update etcd conditions if AppWrapper Job has at least 1 running pod and transitioning from dispatched to running. if (awNew.Status.QueueJobState != arbv1.AppWrapperCondRunning) && (awNew.Status.Running > 0) { - syncQueueJob := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - syncQueueJob.SetJitter(0.05) - err := syncQueueJob.Run(func() error { - awNew.Status.QueueJobState = arbv1.AppWrapperCondRunning - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") - awNew.Status.Conditions = append(awNew.Status.Conditions, cond) - awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - retryErr := cc.updateStatusInEtcd(awNew, "[syncQueueJob] Update pod counts") - if retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[syncQueueJob] Conflict detected when updating pod status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Warningf("[syncQueueJob] Error updating pod status counts for AppWrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, retryErr) - } - return retryErr - } - return nil - }) - return err + awNew.Status.QueueJobState = arbv1.AppWrapperCondRunning + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") + awNew.Status.Conditions = append(awNew.Status.Conditions, cond) + awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning + err := cc.updateStatusInEtcdWithRetry(awNew, "[syncQueueJob] Update pod counts") + if err != nil { + klog.Error("[syncQueueJob] Error updating pod status counts for app wrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, err) + return err + } + return nil } //For debugging? @@ -2003,368 +1998,320 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // pods according to what is specified in the job.Spec. // Does NOT modify . func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool) error { - startTime := time.Now() - defer func() { - klog.V(10).Infof("[manageQueueJob] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) - }() - manageQueueJobRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - manageQueueJobRetrier.SetJitter(0.05) - err := manageQueueJobRetrier.Run(func() error { - cacheAWJob, retryErr := cc.getAppWrapper(qj.Namespace, qj.Name, "[manageQueueJob] get fresh appwrapper ") - if retryErr != nil { - // Implicit detection of deletion - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[manageQueueJob] AppWrapper '%s/%s' not found in cache and will be deleted.", qj.Namespace, qj.Name) - // clean up app wrapper resources including quota - if err := cc.Cleanup(qj); err != nil { - klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err) - // return error so operation can be retried from synch queue job + + if !cc.isDispatcher { // Agent Mode + //Job is Complete only update pods if needed. + if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { + if podPhaseChanges { + // Only update etcd if AW status has changed. This can happen for periodic + // updates of pod phase counts done in caller of this function. + err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - podPhaseChanges") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status for podPhaseChanges for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) return err } - cc.qjqueue.Delete(qj) - return nil } - klog.Errorf("[manageQueueJob] Failed to get fresh copy of appwrapper AppWrapper '%s/%s', err %v", qj.Namespace, qj.Name, retryErr) - return retryErr + return nil } - klog.V(10).Infof("[manageQueueJob] Cache AW '%s/%s' &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status) - // make sure qj has the latest information - if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(5).Infof("[manageQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(5).Infof("[manageQueueJob] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) - cacheAWJob.DeepCopyInto(qj) - } - if !cc.isDispatcher { // Agent Mode - - //Job is Complete only update pods if needed. - if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { - if podPhaseChanges { - // Only update etcd if AW status has changed. This can happen for periodic - // updates of pod phase counts done in caller of this function. - if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr - } + // First execution of qj to set Status.State = Enqueued + if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { + // if there are running resources for this job then delete them because the job was put in + // pending state... + + // If this the first time seeing this AW, no need to delete. + stateLen := len(qj.Status.State) + if stateLen > 0 { + klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status.CanRun=%t, status.State=%s", qj.Namespace, qj.Name, qj.Status.CanRun, qj.Status.State) + err00 := cc.Cleanup(qj) + if err00 != nil { + klog.Errorf("[manageQueueJob] Failed to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err00) + return err00 } + klog.V(2).Infof("[manageQueueJob] Delete resources for AppWrapper Job '%s/%s' due to preemption was sucessfull, status.CanRun=%t, status.State=%s", qj.Namespace, qj.Name, qj.Status.CanRun, qj.Status.State) + } + + qj.Status.State = arbv1.AppWrapperStateEnqueued + // add qj to qjqueue only when it is not in UnschedulableQ + if cc.qjqueue.IfExistUnschedulableQ(qj) { + klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) return nil } - // First execution of qj to set Status.State = Enqueued - if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { - // if there are running resources for this job then delete them because the job was put in - // pending state... + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + } + + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - setQueueing") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status 'setQueueing' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err + } + klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if err00 := cc.qjqueue.AddIfNotPresent(qj); err00 != nil { + klog.Errorf("manageQueueJob] Failed to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) + cc.enqueue(qj) + } else { + klog.V(3).Infof("[manageQueueJob] Added '%s/%s' to activeQueue queue 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } + return nil + } + // Handle recovery condition + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { + // One more check to ensure AW is not the current active scheduled object + if !cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { + cc.qjqueue.AddIfNotPresent(qj) + klog.V(6).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", + qj.Namespace, qj.Name, qj.Status) + return nil + } + } - // If this the first time seeing this AW, no need to delete. - stateLen := len(qj.Status.State) - if stateLen > 0 { - klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status=%v", qj.Namespace, qj.Name, qj.Status) - err00 := cc.Cleanup(qj) + // add qj to Etcd for dispatch + if qj.Status.CanRun && qj.Status.State != arbv1.AppWrapperStateActive && + qj.Status.State != arbv1.AppWrapperStateCompleted && + qj.Status.State != arbv1.AppWrapperStateRunningHoldCompletion { + //keep conditions until the appwrapper is re-dispatched + qj.Status.PendingPodConditions = nil + + qj.Status.State = arbv1.AppWrapperStateActive + if qj.Spec.AggrResources.Items != nil { + for i := range qj.Spec.AggrResources.Items { + err00 := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { + return strconv.Itoa(i) + }) if err00 != nil { - klog.Errorf("[manageQueueJob] Failed to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err00) - return err00 + klog.Warningf("Failed to add tag to aggregate resource item %s of app apprapper '%s/%s', err = %v", qj.Spec.AggrResources.Items[i].Name, qj.Namespace, qj.Name, err00) } - klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was be preempted was sucessfull, status=%v", qj.Namespace, qj.Name, qj.Status) - } - - qj.Status.State = arbv1.AppWrapperStateEnqueued - // add qj to qjqueue only when it is not in UnschedulableQ - if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - return nil } - - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - } - - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing") - if retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) + } + klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' BeforeDispatchingToEtcd Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) + dispatched := true + dispatchFailureReason := "ItemCreationFailure." + dispatchFailureMessage := "" + for _, ar := range qj.Spec.AggrResources.Items { + klog.V(10).Infof("[manageQueueJob] before dispatch [%v].SyncQueueJob %s Version=%s Status.CanRun=%t, Status.State=%s", ar.Type, qj.Name, qj.ResourceVersion, qj.Status.CanRun, qj.Status.State) + // Call Resource Controller of ar.Type to issue REST call to Etcd for resource creation + err00 := cc.qjobResControls[ar.Type].SyncQueueJob(qj, &ar) + if err00 != nil { + if apierrors.IsInvalid(err00) { + klog.Warningf("[manageQueueJob] Invalid item sent for dispatching by app wrapper='%s/%s' type=%v", qj.Namespace, qj.Name, ar.Type) } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) + klog.Errorf("[manageQueueJob] Error dispatching item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) } - return retryErr - } - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if err00 := cc.qjqueue.AddIfNotPresent(qj); err00 != nil { - klog.Errorf("manageQueueJob] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) - cc.enqueue(qj) - } else { - klog.V(3).Infof("[manageQueueJob] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - return nil - } - // Handle recovery condition - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { - // One more check to ensure AW is not the current active scheduled object - if cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { - cc.qjqueue.AddIfNotPresent(qj) - klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", - qj.Namespace, qj.Name, qj.Status) - return nil + dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + dispatched = false + break } } - - // add qj to Etcd for dispatch - if qj.Status.CanRun && qj.Status.State != arbv1.AppWrapperStateActive && - qj.Status.State != arbv1.AppWrapperStateCompleted && - qj.Status.State != arbv1.AppWrapperStateRunningHoldCompletion { - //keep conditions until the appwrapper is re-dispatched - qj.Status.PendingPodConditions = nil - - qj.Status.State = arbv1.AppWrapperStateActive - if qj.Spec.AggrResources.Items != nil { - for i := range qj.Spec.AggrResources.Items { - err00 := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { - return strconv.Itoa(i) - }) - if err00 != nil { - klog.Warningf("Failed to add tag to aggregate resource item %s of app apprapper '%s/%s', err = %v", qj.Spec.AggrResources.Items[i].Name, qj.Namespace, qj.Name, err00) - } - } - } - klog.V(4).Infof("[manageQueueJob] %s 3Delay=%.6f seconds BeforeDispatchingToEtcd Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) - dispatched := true - dispatchFailureReason := "ItemCreationFailure." - dispatchFailureMessage := "" - for _, ar := range qj.Spec.AggrResources.Items { - klog.V(10).Infof("[manageQueueJob] before dispatch [%v].SyncQueueJob %s &qj=%p Version=%s Status=%+v", ar.Type, qj.Name, qj, qj.ResourceVersion, qj.Status) - // Call Resource Controller of ar.Type to issue REST call to Etcd for resource creation - err00 := cc.qjobResControls[ar.Type].SyncQueueJob(qj, &ar) + if dispatched { + // Handle generic resources + for _, ar := range qj.Spec.AggrResources.GenericItems { + klog.V(10).Infof("[manageQueueJob] before dispatch Generic.SyncQueueJob %s Version=%sStatus.CanRun=%t, Status.State=%s", qj.Name, qj.ResourceVersion, qj.Status.CanRun, qj.Status.State) + _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) if err00 != nil { if apierrors.IsInvalid(err00) { - klog.Warningf("[manageQueueJob] Invalid item sent for dispatching by app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) + klog.Warningf("[manageQueueJob] Invalid generic item sent for dispatching by app wrapper='%s/%s' err=%v", qj.Namespace, qj.Name, err00) } else { - klog.Errorf("[manageQueueJob] Error dispatching item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) + klog.Errorf("[manageQueueJob] Error dispatching generic item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, err00) } dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + klog.Errorf("[manageQueueJob] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) dispatched = false - break } } - if dispatched { - // Handle generic resources - for _, ar := range qj.Spec.AggrResources.GenericItems { - klog.V(10).Infof("[manageQueueJob] before dispatch Generic.SyncQueueJob %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) - _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) - if err00 != nil { - if apierrors.IsInvalid(err00) { - klog.Warningf("[manageQueueJob] Invalid generic item sent for dispatching by app wrapper='%s/%s' err=%v", qj.Namespace, qj.Name, err00) - } else { - klog.Errorf("[manageQueueJob] Error dispatching generic item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, err00) - } - dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) - klog.Errorf("[manageQueueJob] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) - dispatched = false - } - } + } + + if dispatched { // set AppWrapperCondRunning if all resources are successfully dispatched + qj.Status.QueueJobState = arbv1.AppWrapperCondDispatched + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondDispatched, "AppWrapperRunnable") + if index < 0 { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") + qj.Status.Conditions[index] = *cond.DeepCopy() } - if dispatched { // set AppWrapperCondRunning if all resources are successfully dispatched - qj.Status.QueueJobState = arbv1.AppWrapperCondDispatched - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondDispatched, "AppWrapperRunnable") - if index < 0 { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondDispatched, v1.ConditionTrue, "AppWrapperRunnable", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - } + klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' after DispatchingToEtcd Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) - klog.V(3).Infof("[manageQueueJob] %s 4Delay=%.6f seconds AllResourceDispatchedToEtcd Version=%s Status=%+v", - qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) + } else { + klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' failed dispatching Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) - } else { - qj.Status.State = arbv1.AppWrapperStateFailed - qj.Status.QueueJobState = arbv1.AppWrapperCondFailed - if !isLastConditionDuplicate(qj, arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) - qj.Status.Conditions = append(qj.Status.Conditions, cond) - } - // clean up app wrapper resources including quota - if err00 := cc.Cleanup(qj); err00 != nil { - klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err00) - // return error so operation can be retried - return err00 - } - cc.qjqueue.Delete(qj) + qj.Status.State = arbv1.AppWrapperStateFailed + qj.Status.QueueJobState = arbv1.AppWrapperCondFailed + if !isLastConditionDuplicate(qj, arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) + qj.Status.Conditions = append(qj.Status.Conditions, cond) } - - qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch - retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - afterEtcdDispatching") - if retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr + // clean up app wrapper resources including quota + if err00 := cc.Cleanup(qj); err00 != nil { + klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err00) + // return error so operation can be retried + return err00 } + cc.qjqueue.Delete(qj) + } - } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { - //set appwrapper status to Complete or RunningHoldCompletion - derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) - - //Set Appwrapper state to complete if all items in Appwrapper - //are completed - if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { - qj.Status.State = derivedAwStatus - var updateQj *arbv1.AppWrapper - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondRunningHoldCompletion, "SomeItemsCompleted") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondRunningHoldCompletion - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - qj.Status.FilterIgnore = true // Update AppWrapperCondRunningHoldCompletion - updateQj = qj.DeepCopy() - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - updateQj = qj.DeepCopy() - } - if retryErr := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion"); retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr - } + qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch + err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - afterEtcdDispatching") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status 'afterEtcdDispatching' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err + } + return nil + } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { + klog.Infof("[manageQueueJob] Getting completion status for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", qj.Namespace, qj.Name, qj.ResourceVersion, + qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) + + //set appwrapper status to Complete or RunningHoldCompletion + derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) + + klog.Infof("[manageQueueJob] Got completion status '%s' for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", derivedAwStatus, qj.Namespace, qj.Name, qj.ResourceVersion, + qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) + + //Set Appwrapper state to complete if all items in Appwrapper + //are completed + if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { + qj.Status.State = derivedAwStatus + var updateQj *arbv1.AppWrapper + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondRunningHoldCompletion, "SomeItemsCompleted") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondRunningHoldCompletion + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + qj.Status.FilterIgnore = true // Update AppWrapperCondRunningHoldCompletion + updateQj = qj.DeepCopy() + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunningHoldCompletion, v1.ConditionTrue, "SomeItemsCompleted", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + updateQj = qj.DeepCopy() } - //Set appwrapper status to complete - if derivedAwStatus == arbv1.AppWrapperStateCompleted { - qj.Status.State = derivedAwStatus - qj.Status.CanRun = false - var updateQj *arbv1.AppWrapper - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) - qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted - updateQj = qj.DeepCopy() - } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - updateQj = qj.DeepCopy() - } - if retryErr := cc.updateStatusInEtcd(updateQj, "[syncQueueJob] setCompleted"); retryErr != nil { - if cc.quotaManager != nil { - cc.quotaManager.Release(updateQj) - } - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr - } + err := cc.updateStatusInEtcdWithRetry(updateQj, "[manageQueueJob] setRunningHoldCompletion") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status 'setRunningHoldCompletion' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err + } + } + //Set appwrapper status to complete + if derivedAwStatus == arbv1.AppWrapperStateCompleted { + qj.Status.State = derivedAwStatus + qj.Status.CanRun = false + var updateQj *arbv1.AppWrapper + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondCompleted, "PodsCompleted") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondCompleted + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + qj.Status.FilterIgnore = true // Update AppWrapperCondCompleted + updateQj = qj.DeepCopy() + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondCompleted, v1.ConditionTrue, "PodsCompleted", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + updateQj = qj.DeepCopy() + } + err := cc.updateStatusInEtcdWithRetry(updateQj, "[manageQueueJob] setCompleted") + if err != nil { if cc.quotaManager != nil { cc.quotaManager.Release(updateQj) } + klog.Errorf("[manageQueueJob] Error updating status 'setCompleted' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err } - } else if podPhaseChanges { // Continued bug fix - // Only update etcd if AW status has changed. This can happen for periodic - // updates of pod phase counts done in caller of this function. - if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - podPhaseChanges"); retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr + if cc.quotaManager != nil { + cc.quotaManager.Release(updateQj) } } - return nil - } else { // Dispatcher Mode + klog.Infof("[manageQueueJob] Done getting completion status for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", qj.Namespace, qj.Name, qj.ResourceVersion, + qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) - if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { - // if there are running resources for this job then delete them because the job was put in - // pending state... - klog.V(3).Infof("[manageQueueJob] [Dispatcher] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) - err00 := cc.Cleanup(qj) - if err00 != nil { - klog.Errorf("Failed to clean up resources for app wrapper '%s/%s', err =%v", qj.Namespace, qj.Name, err00) - return err00 - } - - qj.Status.State = arbv1.AppWrapperStateEnqueued - if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } else { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] before add to activeQ '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - if retryErr := cc.updateStatusInEtcd(qj, "manageQueueJob - setQueueing"); retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[manageQueueJob] Conflict detected when updating status counts for AppWrapper: '%s/%s'. Retrying", qj.Namespace, qj.Name) - } else { - klog.Errorf("[manageQueueJob] Error updating status counts AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, retryErr) - } - return retryErr - } - if err00 = cc.qjqueue.AddIfNotPresent(qj); err00 != nil { - klog.Errorf("[manageQueueJob] [Dispatcher] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) - cc.enqueue(qj) - } else { - klog.V(4).Infof("[manageQueueJob] [Dispatcher] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - } - return nil + } else if podPhaseChanges { // Continued bug fix + // Only update etcd if AW status has changed. This can happen for periodic + // updates of pod phase counts done in caller of this function. + err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - podPhaseChanges") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status 'podPhaseChanges' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err } - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { - cc.qjqueue.AddIfNotPresent(qj) - return nil + } + return nil + } else { // Dispatcher Mode + + if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { + // if there are running resources for this job then delete them because the job was put in + // pending state... + klog.V(3).Infof("[manageQueueJob] [Dispatcher] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) + err00 := cc.Cleanup(qj) + if err00 != nil { + klog.Errorf("Failed to clean up resources for app wrapper '%s/%s', err =%v", qj.Namespace, qj.Name, err00) + return err00 } - if qj.Status.CanRun && !qj.Status.IsDispatched { - if klog.V(10).Enabled() { - current_time := time.Now() - klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ '%s/%s' has Overhead Before Dispatching: %s", qj.Namespace, qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[manageQueueJob] [Dispatcher] '%s/%s', %s: WorkerBeforeDispatch", qj.Namespace, qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) - } - queuejobKey, _ := GetQueueJobKey(qj) - if agentId, ok := cc.dispatchMap[queuejobKey]; ok { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) - cc.agentMap[agentId].CreateJob(qj) - qj.Status.IsDispatched = true - } else { - klog.Errorf("[manageQueueJob] [Dispatcher] AppWrapper %s not found in dispatcher mapping.", qj.Name) - } - if klog.V(10).Enabled() { - current_time := time.Now() - klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[manageQueueJob] [Dispatcher] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + qj.Status.State = arbv1.AppWrapperStateEnqueued + if cc.qjqueue.IfExistUnschedulableQ(qj) { + klog.V(10).Infof("[manageQueueJob] [Dispatcher] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } else { + klog.V(10).Infof("[manageQueueJob] [Dispatcher] before add to activeQ '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - setQueueing") + if err != nil { + klog.Errorf("[manageQueueJob] Error updating status 'setQueueing' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + return err } - if retryErr := cc.updateStatusInEtcd(qj, "[manageQueueJob] [Dispatcher] -- set dispatched true"); retryErr != nil { - klog.Errorf("Failed to update status of AppWrapper %s/%s: err=%v", - qj.Namespace, qj.Name, retryErr) - return retryErr + if err00 = cc.qjqueue.AddIfNotPresent(qj); err00 != nil { + klog.Errorf("[manageQueueJob] [Dispatcher] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) + cc.enqueue(qj) + } else { + klog.V(4).Infof("[manageQueueJob] [Dispatcher] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } } return nil } - }) - return err + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { + cc.qjqueue.AddIfNotPresent(qj) + return nil + } + if qj.Status.CanRun && !qj.Status.IsDispatched { + if klog.V(10).Enabled() { + current_time := time.Now() + klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ '%s/%s' has Overhead Before Dispatching: %s", qj.Namespace, qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] '%s/%s', %s: WorkerBeforeDispatch", qj.Namespace, qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + } + + queuejobKey, _ := GetQueueJobKey(qj) + if agentId, ok := cc.dispatchMap[queuejobKey]; ok { + klog.V(10).Infof("[manageQueueJob] [Dispatcher] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) + cc.agentMap[agentId].CreateJob(qj) + qj.Status.IsDispatched = true + } else { + klog.Errorf("[manageQueueJob] [Dispatcher] AppWrapper %s not found in dispatcher mapping.", qj.Name) + } + if klog.V(10).Enabled() { + current_time := time.Now() + klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[manageQueueJob] [Dispatcher] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + } + err := cc.updateStatusInEtcdWithRetry(qj, "[manageQueueJob] [Dispatcher] -- set dispatched true") + if err != nil { + klog.Errorf("Failed to update status of AppWrapper %s/%s: err=%v", qj.Namespace, qj.Name, err) + return err + } + } + return nil + } } // Cleanup function @@ -2382,7 +2329,7 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { err = multierror.Append(err, err00) continue } - klog.V(3).Infof("[Cleanup] Deleted item from app wrapper='%s/%s'", + klog.V(3).Infof("[Cleanup] Deleted item from app wrapper='%s/%s'", appwrapper.Namespace, appwrapper.Name) } } @@ -2395,7 +2342,7 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { err = multierror.Append(err, err00) continue } - klog.V(3).Infof("[Cleanup] Deleted generic item %s, GVK=%s.%s.%s from app wrapper='%s/%s", + klog.V(3).Infof("[Cleanup] Deleted generic item %s, GVK=%s.%s.%s from app wrapper='%s/%s'", genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Namespace, appwrapper.Name) } } @@ -2468,7 +2415,12 @@ func (c *EtcdErrorClassifier) Classify(err error) retrier.Action { func (cc *XController) IsActiveAppWrapper(name, namespace string) bool { cc.schedulingMutex.RLock() defer cc.schedulingMutex.RUnlock() - return cc.schedulingAW == nil || + if cc.schedulingAW == nil { + klog.V(6).Info("[IsActiveAppWrapper] No active scheduling app wrapper set") + } else { + klog.V(6).Infof("[IsActiveAppWrapper] Active scheduling app wrapper is : '%s/%s'", cc.schedulingAW.Namespace, cc.schedulingAW.Name) + } + return cc.schedulingAW != nil && (strings.Compare(cc.schedulingAW.Namespace, namespace) != 0 && strings.Compare(cc.schedulingAW.Name, name) != 0) } diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 1b9b07dd5..1a3f56223 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -366,7 +366,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) - It("Create AppWrapper - Namespace Only - 0 Pods", func() { + PIt("Create AppWrapper - Namespace Only - 0 Pods", func() { fmt.Fprintf(os.Stdout, "[e2e] Create AppWrapper - Namespace Only - 0 Pods - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper @@ -459,7 +459,7 @@ var _ = Describe("AppWrapper E2E Test", func() { // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-deployment-1-800-cpu") + Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-deployment-1-850-cpu") }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Not Queuing Test", func() { @@ -668,16 +668,16 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappersPtr := &appwrappers defer cleanupTestObjectsPtr(context, appwrappersPtr) - aw := createGenericDeploymentAWWithMultipleItems(context, appendRandomString("aw-deployment-2-status")) + aw := createGenericDeploymentAWWithMultipleItems(context, "aw-deployment-rhc") appwrappers = append(appwrappers, aw) - time.Sleep(1 * time.Minute) + time.Sleep(30 * time.Second) err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-status") + Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-rhc") aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) Expect(err).NotTo(HaveOccurred(), "Expecting to get app wrapper status") fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateRunningHoldCompletion)) - fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed.\n") + fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed. Awaiting app wrapper cleanup.\n") }) It("MCAD Service no RunningHoldCompletion or Complete Test", func() { From 4ce6bfe365e7b216087b7717379c2fb6506cc1cc Mon Sep 17 00:00:00 2001 From: Laurentiu Bradin <109964136+z103cb@users.noreply.github.com> Date: Tue, 11 Jul 2023 13:39:48 +0300 Subject: [PATCH 20/23] Fixed Merges issues Fixed Failing test --- test/e2e/queue.go | 17 +++++++++++------ test/e2e/util.go | 4 ++-- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 2f17a7fee..476a195cb 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -97,7 +97,7 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Ready pods are expected for app wrapper: aw-deployment-2-550cpu") // This should fill up the master node aw2 := createDeploymentAWwith350CPU(context, appendRandomString("aw-deployment-2-350cpu")) @@ -105,8 +105,8 @@ var _ = Describe("AppWrapper E2E Test", func() { // Using quite mode due to creating of pods in earlier step. err = waitAWReadyQuiet(context, aw2) - fmt.Fprintf(os.Stdout, "The error is %v", err) - Expect(err).NotTo(HaveOccurred()) + Expect(err).NotTo(HaveOccurred(), "Ready pods are expected for app wrapper: aw-deployment-2-350cpu") + fmt.Fprintf(os.Stdout, "[e2e] MCAD CPU Accounting Test - Completed. Awaiting app wrapper cleanup...\n") }) It("MCAD CPU Preemption Test", func() { @@ -428,7 +428,6 @@ var _ = Describe("AppWrapper E2E Test", func() { // This should fill up the worker node and most of the master node aw := createDeploymentAWwith550CPU(context, appendRandomString("aw-deployment-2-550cpu")) appwrappers = append(appwrappers, aw) - time.Sleep(1 * time.Minute) err := waitAWPodsReady(context, aw) Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-deployment-2-550cpu") @@ -443,6 +442,7 @@ var _ = Describe("AppWrapper E2E Test", func() { err = waitAWPodsPending(context, aw2) Expect(err).NotTo(HaveOccurred(), "Expecting pending pods (try 2) for app wrapper: aw-ff-deployment-1-850-cpu") + fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Pending pods found for app wrapper aw-ff-deployment-1-850-cpu\n") // This should fit on cluster after AW aw-deployment-1-850-cpu above is automatically preempted on // scheduling failure @@ -451,17 +451,22 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw3) - // Wait for pods to get created, assumes preemption around 12 minutes - err = waitAWPodsExists(context, aw3, 12*time.Minute) + // Wait for pods to get created, assumes preemption around 3 minutes + err = waitAWPodsExists(context, aw3, 3*time.Minute) Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-ff-deployment-2-340-cpu") + fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Pods not found for app wrapper aw-ff-deployment-2-340-cpu\n") // Make sure they are running err = waitAWPodsReady(context, aw3) Expect(err).NotTo(HaveOccurred(), "Expecting ready pods for app wrapper: aw-ff-deployment-2-340-cpu") + fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Ready pods found for app wrapper aw-ff-deployment-2-340-cpu\n") // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption + time.Sleep(1 * time.Minute) err = waitAWAnyPodsExists(context, aw2) Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-deployment-1-850-cpu") + fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Completed. Awaiting app wrapper cleanup\n") + }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Not Queuing Test", func() { diff --git a/test/e2e/util.go b/test/e2e/util.go index 30769b9a4..9c85d6d8c 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -696,7 +696,7 @@ func awNamespacePhase(ctx *context, aw *arbv1.AppWrapper, phase []v1.NamespacePh } func waitAWPodsReady(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, ninetySeconds, int(aw.Spec.SchedSpec.MinAvailable), false) + return waitAWPodsReadyEx(ctx, aw, threeHundredSeconds, int(aw.Spec.SchedSpec.MinAvailable), false) } func waitAWPodsCompleted(ctx *context, aw *arbv1.AppWrapper, timeout time.Duration) error { @@ -708,7 +708,7 @@ func waitAWPodsNotCompleted(ctx *context, aw *arbv1.AppWrapper) error { } func waitAWReadyQuiet(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, ninetySeconds, int(aw.Spec.SchedSpec.MinAvailable), true) + return waitAWPodsReadyEx(ctx, aw, threeHundredSeconds, int(aw.Spec.SchedSpec.MinAvailable), true) } func waitAWAnyPodsExists(ctx *context, aw *arbv1.AppWrapper) error { From 890dade9ca3a0b3ac2ff65d02f0e908ca82b1012 Mon Sep 17 00:00:00 2001 From: Abhishek Malvankar Date: Tue, 11 Jul 2023 17:00:16 -0400 Subject: [PATCH 21/23] fix test --- test/e2e/queue.go | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 476a195cb..a911e75eb 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -417,7 +417,7 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550-vs-550-cpu") }) - It("MCAD Scheduling Fail Fast Preemption Test", func() { + FIt("MCAD Scheduling Fail Fast Preemption Test", func() { fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Started.\n") context := initTestContext() @@ -428,6 +428,7 @@ var _ = Describe("AppWrapper E2E Test", func() { // This should fill up the worker node and most of the master node aw := createDeploymentAWwith550CPU(context, appendRandomString("aw-deployment-2-550cpu")) appwrappers = append(appwrappers, aw) + time.Sleep(1 * time.Minute) err := waitAWPodsReady(context, aw) Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-deployment-2-550cpu") @@ -451,20 +452,20 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw3) - // Wait for pods to get created, assumes preemption around 3 minutes - err = waitAWPodsExists(context, aw3, 3*time.Minute) + // Wait for pods to get created, assumes preemption around 10 minutes + err = waitAWPodsExists(context, aw3, 720000*time.Millisecond) Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-ff-deployment-2-340-cpu") fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Pods not found for app wrapper aw-ff-deployment-2-340-cpu\n") - // Make sure they are running + // Make sure aw2 pods do not exist err = waitAWPodsReady(context, aw3) - Expect(err).NotTo(HaveOccurred(), "Expecting ready pods for app wrapper: aw-ff-deployment-2-340-cpu") + Expect(err).NotTo(HaveOccurred(), "Expecting no pods for app wrapper: aw-ff-deployment-2-340-cpu") fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Ready pods found for app wrapper aw-ff-deployment-2-340-cpu\n") // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption - time.Sleep(1 * time.Minute) + time.Sleep(5 * time.Minute) err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-deployment-1-850-cpu") + Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-ff-deployment-1-850-cpu") fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Completed. Awaiting app wrapper cleanup\n") }) From 6b0461ad0556fc8707f873ebf1e173dfc2a7510b Mon Sep 17 00:00:00 2001 From: Abhishek Malvankar Date: Tue, 11 Jul 2023 17:32:08 -0400 Subject: [PATCH 22/23] remove Fit --- test/e2e/queue.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/queue.go b/test/e2e/queue.go index a911e75eb..6b02af1bd 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -417,7 +417,7 @@ var _ = Describe("AppWrapper E2E Test", func() { Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550-vs-550-cpu") }) - FIt("MCAD Scheduling Fail Fast Preemption Test", func() { + It("MCAD Scheduling Fail Fast Preemption Test", func() { fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Started.\n") context := initTestContext() From 97f8285a8e1ded26e5d85541cc423d97d5f68661 Mon Sep 17 00:00:00 2001 From: Abhishek Malvankar Date: Tue, 11 Jul 2023 19:26:23 -0400 Subject: [PATCH 23/23] bump build time --- hack/run-e2e-kind.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/run-e2e-kind.sh b/hack/run-e2e-kind.sh index 35646070c..ca8ac10fa 100755 --- a/hack/run-e2e-kind.sh +++ b/hack/run-e2e-kind.sh @@ -394,4 +394,4 @@ setup-mcad-env kuttl-tests mcad-quota-management-down mcad-up -go test ./test/e2e -v -timeout 120m -count=1 +go test ./test/e2e -v -timeout 130m -count=1