diff --git a/config/crd/bases/mcad.ibm.com_appwrappers.yaml b/config/crd/bases/mcad.ibm.com_appwrappers.yaml index 3280f8a14..0f7598754 100644 --- a/config/crd/bases/mcad.ibm.com_appwrappers.yaml +++ b/config/crd/bases/mcad.ibm.com_appwrappers.yaml @@ -1,3 +1,5 @@ + +--- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: @@ -776,10 +778,6 @@ spec: QueueJob (by Informer) format: date-time type: string - controllerfirstdispatchtimestamp: - description: Microsecond level timestamp when controller first dispatches appwrapper - format: date-time - type: string failed: description: The number of resources which reached phase Failed. format: int32 @@ -792,7 +790,8 @@ spec: description: Is Dispatched? type: boolean local: - description: Indicate if message is a duplicate (for Informer to recognize duplicate messages) + description: Indicate if message is a duplicate (for Informer to recognize + duplicate messages) type: boolean message: type: string @@ -813,13 +812,15 @@ spec: format: int32 type: integer queuejobstate: - description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining ... + description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining, + ... type: string running: format: int32 type: integer sender: - description: Indicate sender of this message (extremely useful for debugging) + description: Indicate sender of this message (extremely useful for + debugging) type: string state: description: State - Pending, Running, Failed, Deleted @@ -833,22 +834,9 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer - numberOfRequeueings: - description: Field to keep track of how many times a requeuing event has been triggered - format: int32 - type: integer - default: 0 - requeueingTimeInSeconds: - description: Field to keep track of total number of seconds spent in requeueing - format: int32 - type: integer - default: 0 type: object required: - spec type: object served: true storage: true - subresources: - status: {} - diff --git a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml index 3280f8a14..0f7598754 100644 --- a/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml +++ b/deployment/mcad-controller/crds/mcad.ibm.com_appwrappers.yaml @@ -1,3 +1,5 @@ + +--- apiVersion: apiextensions.k8s.io/v1 kind: CustomResourceDefinition metadata: @@ -776,10 +778,6 @@ spec: QueueJob (by Informer) format: date-time type: string - controllerfirstdispatchtimestamp: - description: Microsecond level timestamp when controller first dispatches appwrapper - format: date-time - type: string failed: description: The number of resources which reached phase Failed. format: int32 @@ -792,7 +790,8 @@ spec: description: Is Dispatched? type: boolean local: - description: Indicate if message is a duplicate (for Informer to recognize duplicate messages) + description: Indicate if message is a duplicate (for Informer to recognize + duplicate messages) type: boolean message: type: string @@ -813,13 +812,15 @@ spec: format: int32 type: integer queuejobstate: - description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining ... + description: State of QueueJob - Init, Queueing, HeadOfLine, Rejoining, + ... type: string running: format: int32 type: integer sender: - description: Indicate sender of this message (extremely useful for debugging) + description: Indicate sender of this message (extremely useful for + debugging) type: string state: description: State - Pending, Running, Failed, Deleted @@ -833,22 +834,9 @@ spec: (is this different from the MinAvailable from JobStatus) format: int32 type: integer - numberOfRequeueings: - description: Field to keep track of how many times a requeuing event has been triggered - format: int32 - type: integer - default: 0 - requeueingTimeInSeconds: - description: Field to keep track of total number of seconds spent in requeueing - format: int32 - type: integer - default: 0 type: object required: - spec type: object served: true storage: true - subresources: - status: {} - diff --git a/go.mod b/go.mod index 907cdc8b3..6a19ad0b1 100644 --- a/go.mod +++ b/go.mod @@ -5,7 +5,6 @@ go 1.18 require ( github.com/eapache/go-resiliency v1.3.0 github.com/emicklei/go-restful v2.16.0+incompatible - github.com/gogo/protobuf v1.3.1 github.com/golang/protobuf v1.4.3 github.com/hashicorp/go-multierror v1.1.1 github.com/kubernetes-sigs/custom-metrics-apiserver v0.0.0-20210311094424-0ca2b1909cdc @@ -46,6 +45,7 @@ require ( github.com/go-openapi/jsonreference v0.19.5 // indirect github.com/go-openapi/spec v0.20.0 // indirect github.com/go-openapi/swag v0.19.12 // indirect + github.com/gogo/protobuf v1.3.1 // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/google/go-cmp v0.5.5 // indirect github.com/google/gofuzz v1.1.0 // indirect diff --git a/hack/run-e2e-kind.sh b/hack/run-e2e-kind.sh index ca8ac10fa..8d5596329 100755 --- a/hack/run-e2e-kind.sh +++ b/hack/run-e2e-kind.sh @@ -34,7 +34,6 @@ export CLUSTER_CONTEXT="--name test" export IMAGE_ECHOSERVER="kicbase/echo-server:1.0" export IMAGE_UBUNTU_LATEST="ubuntu:latest" export IMAGE_UBI_LATEST="registry.access.redhat.com/ubi8/ubi:latest" -export IMAGE_BUSY_BOX_LATEST="k8s.gcr.io/busybox:latest" export KIND_OPT=${KIND_OPT:=" --config ${ROOT_DIR}/hack/e2e-kind-config.yaml"} export KA_BIN=_output/bin export WAIT_TIME="20s" @@ -208,20 +207,27 @@ function kind-up-cluster { exit 1 fi - docker pull ${IMAGE_UBI_LATEST} + docker pull ${IMAGE_ECHOSERVER} if [ $? -ne 0 ] then - echo "Failed to pull ${IMAGE_UBI_LATEST}" + echo "Failed to pull ${IMAGE_ECHOSERVER}" exit 1 fi - - docker pull ${IMAGE_BUSY_BOX_LATEST} + + docker pull ${IMAGE_UBUNTU_LATEST} if [ $? -ne 0 ] then - echo "Failed to pull ${IMAGE_BUSY_BOX_LATEST}" + echo "Failed to pull ${IMAGE_UBUNTU_LATEST}" exit 1 fi - + + docker pull ${IMAGE_UBI_LATEST} + if [ $? -ne 0 ] + then + echo "Failed to pull ${IMAGE_UBI_LATEST}" + exit 1 + fi + if [[ "$MCAD_IMAGE_PULL_POLICY" = "Always" ]] then docker pull ${IMAGE_MCAD} @@ -238,7 +244,7 @@ function kind-up-cluster { fi docker images - for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD} ${IMAGE_UBI_LATEST} ${IMAGE_BUSY_BOX_LATEST} + for image in ${IMAGE_ECHOSERVER} ${IMAGE_UBUNTU_LATEST} ${IMAGE_MCAD} ${IMAGE_UBI_LATEST} do kind load docker-image ${image} ${CLUSTER_CONTEXT} if [ $? -ne 0 ] @@ -324,6 +330,8 @@ function mcad-quota-management-down { echo "Failed to undeploy controller" exit 1 fi + echo "Waiting for the test namespace to be cleaned up.." + sleep 60 } function mcad-up { @@ -394,4 +402,4 @@ setup-mcad-env kuttl-tests mcad-quota-management-down mcad-up -go test ./test/e2e -v -timeout 130m -count=1 +go test ./test/e2e -v -timeout 120m -count=1 \ No newline at end of file diff --git a/pkg/apis/controller/v1beta1/appwrapper.go b/pkg/apis/controller/v1beta1/appwrapper.go index 408404ebe..01bb87b32 100644 --- a/pkg/apis/controller/v1beta1/appwrapper.go +++ b/pkg/apis/controller/v1beta1/appwrapper.go @@ -101,7 +101,7 @@ type AppWrapperService struct { } // AppWrapperResource is App Wrapper aggregation resource -// todo: To be depricated +//todo: To be depricated type AppWrapperResource struct { metav1.TypeMeta `json:",inline"` metav1.ObjectMeta `json:"metadata"` @@ -246,7 +246,7 @@ type AppWrapperStatus struct { // Microsecond level timestamp when controller first sees QueueJob (by Informer) ControllerFirstTimestamp metav1.MicroTime `json:"controllerfirsttimestamp,omitempty"` - // Microsecond level timestamp when controller first dispatches appwrapper + // Microsecond level timestamp when controller first sets appwrapper in state Running ControllerFirstDispatchTimestamp metav1.MicroTime `json:"controllerfirstdispatchtimestamp,omitempty"` // Tell Informer to ignore this update message (do not generate a controller event) @@ -264,25 +264,18 @@ type AppWrapperStatus struct { // Represents the latest available observations of pods under appwrapper PendingPodConditions []PendingPodSpec `json:"pendingpodconditions"` - // Represents the number of cpu consumed by all pods belonging to an appwrapper. + //Resources consumed + TotalCPU float64 `json:"totalcpu,omitempty"` - // Represents the amount of memory consumed by all pods belonging to an appwrapper. TotalMemory float64 `json:"totalmemory,omitempty"` - // Represents the total number of GPUs consumed by all pods belonging to an appwrapper. TotalGPU int64 `json:"totalgpu,omitempty"` - - // Field to keep track of total number of seconds spent in requeueing - RequeueingTimeInSeconds int `json:"requeueingTimeInSeconds,omitempty"` - - // Field to keep track of how many times a requeuing event has been triggered - NumberOfRequeueings int `json:"numberOfRequeueings,omitempty"` } type AppWrapperState string -// enqueued, active, deleting, succeeded, failed +//enqueued, active, deleting, succeeded, failed const ( AppWrapperStateEnqueued AppWrapperState = "Pending" AppWrapperStateActive AppWrapperState = "Running" diff --git a/pkg/controller/clusterstate/cache/cache.go b/pkg/controller/clusterstate/cache/cache.go index e011438e1..9ae976e0e 100644 --- a/pkg/controller/clusterstate/cache/cache.go +++ b/pkg/controller/clusterstate/cache/cache.go @@ -334,7 +334,7 @@ func (sc *ClusterStateCache) updateState() error { } func (sc *ClusterStateCache) deleteJob(job *api.JobInfo) { - klog.V(10).Infof("[deleteJob] Attempting to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) + klog.V(4).Infof("[deleteJob] Attempting to delete Job <%v:%v/%v>", job.UID, job.Namespace, job.Name) time.AfterFunc(5*time.Second, func() { sc.deletedJobs.AddIfNotPresent(job) diff --git a/pkg/controller/metrics/adapter/adapter.go b/pkg/controller/metrics/adapter/adapter.go index 57b089726..ae3bede31 100644 --- a/pkg/controller/metrics/adapter/adapter.go +++ b/pkg/controller/metrics/adapter/adapter.go @@ -32,40 +32,39 @@ package adapter import ( "flag" - "net/http" - "os" - "github.com/project-codeflare/multi-cluster-app-dispatcher/cmd/kar-controllers/app/options" openapinamer "k8s.io/apiserver/pkg/endpoints/openapi" genericapiserver "k8s.io/apiserver/pkg/server" + "net/http" + "os" "github.com/emicklei/go-restful" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/rest" "k8s.io/klog/v2" + adapterprov "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter/provider" "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/apiserver" basecmd "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/cmd" "github.com/kubernetes-sigs/custom-metrics-apiserver/pkg/provider" generatedopenapi "github.com/kubernetes-sigs/custom-metrics-apiserver/test-adapter/generated/openapi" - adapterprov "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter/provider" clusterstatecache "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/clusterstate/cache" ) // New returns a Cache implementation. -func New(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdapter { +func New(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdpater { return newMetricsAdpater(serverOptions, config, clusterStateCache) } -type MetricsAdapter struct { +type MetricsAdpater struct { basecmd.AdapterBase // Message is printed on succesful startup Message string } -func (a *MetricsAdapter) makeProviderOrDie(clusterStateCache clusterstatecache.Cache) (provider.MetricsProvider, *restful.WebService) { +func (a *MetricsAdpater) makeProviderOrDie(clusterStateCache clusterstatecache.Cache) (provider.MetricsProvider, *restful.WebService) { klog.Infof("[makeProviderOrDie] Entered makeProviderOrDie()") client, err := a.DynamicClient() if err != nil { @@ -80,7 +79,7 @@ func (a *MetricsAdapter) makeProviderOrDie(clusterStateCache clusterstatecache.C return adapterprov.NewFakeProvider(client, mapper, clusterStateCache) } -func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOption) []string { +func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOption) []string{ var portedArgs = make([]string, 0) if serverOptions == nil { return portedArgs @@ -92,10 +91,11 @@ func covertServerOptionsToMetricsServerOptions(serverOptions *options.ServerOpti } return portedArgs } -func newMetricsAdpater(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdapter { +func newMetricsAdpater(serverOptions *options.ServerOption, config *rest.Config, clusterStateCache clusterstatecache.Cache) *MetricsAdpater { klog.V(10).Infof("[newMetricsAdpater] Entered newMetricsAdpater()") - cmd := &MetricsAdapter{} + cmd := &MetricsAdpater{ + } cmd.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(generatedopenapi.GetOpenAPIDefinitions, openapinamer.NewDefinitionNamer(apiserver.Scheme)) cmd.OpenAPIConfig.Info.Title = "MetricsAdpater" diff --git a/pkg/controller/queuejob/active_appwrapper.go b/pkg/controller/queuejob/active_appwrapper.go new file mode 100644 index 000000000..1aabf70b9 --- /dev/null +++ b/pkg/controller/queuejob/active_appwrapper.go @@ -0,0 +1,54 @@ +package queuejob + +import ( + "strings" + "sync" + + arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" +) + +// ActiveAppWrapper is current scheduling AppWrapper in the XController struct. +// Its sole purpose is provide a thread safe way to for use the XController logic +type ActiveAppWrapper struct { + activeAW *arbv1.AppWrapper + activeAWMutex *sync.RWMutex +} + +// NewActiveAppWrapper +func NewActiveAppWrapper() *ActiveAppWrapper { + return &ActiveAppWrapper{ + activeAW: nil, + activeAWMutex: &sync.RWMutex{}, + } +} + +// AtomicSet as is name implies, atomically sets the activeAW to the new value +func (aw *ActiveAppWrapper) AtomicSet(newValue *arbv1.AppWrapper) { + aw.activeAWMutex.Lock() + defer aw.activeAWMutex.Unlock() + aw.activeAW = newValue +} + +// IsActiveAppWrapper safely performs the comparison that was done inside the if block +// at line 1977 in the queuejob_controller_ex.go +// The code looked like this: +// +// if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && +// !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { +// // One more check to ensure AW is not the current active schedule object +// if cc.schedulingAW == nil || +// (strings.Compare(cc.schedulingAW.Namespace, qj.Namespace) != 0 && +// strings.Compare(cc.schedulingAW.Name, qj.Name) != 0) { +// cc.qjqueue.AddIfNotPresent(qj) +// klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", +// qj.Namespace, qj.Name, qj.Status) +// return nil +// } +// } +func (aw *ActiveAppWrapper) IsActiveAppWrapper(name, namespace string) bool { + aw.activeAWMutex.RLock() + defer aw.activeAWMutex.RUnlock() + return aw.activeAW == nil || + (strings.Compare(aw.activeAW.Namespace, namespace) != 0 && + strings.Compare(aw.activeAW.Name, name) != 0) +} diff --git a/pkg/controller/queuejob/active_appwrapper_test.go b/pkg/controller/queuejob/active_appwrapper_test.go new file mode 100644 index 000000000..250ac1798 --- /dev/null +++ b/pkg/controller/queuejob/active_appwrapper_test.go @@ -0,0 +1,43 @@ +package queuejob_test + +import ( + "github.com/onsi/ginkgo" + "github.com/onsi/gomega" + arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" + "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/queuejob" + v1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = ginkgo.Describe("Active App Wrapper Tests", func() { + var activeAppWrapper *queuejob.ActiveAppWrapper + ginkgo.When("Checking if is active app wrapper", func() { + ginkgo.BeforeEach(func() { + activeAppWrapper = queuejob.NewActiveAppWrapper() + }) + ginkgo.It("should return 'true' for a nil active app wrapper", func() { + gomega.Expect(activeAppWrapper.IsActiveAppWrapper("an-appwrapper-name", "unit-test-namespace"), + gomega.BeTrue()) + }) + ginkgo.It("should return 'true' for a the same app wrapper name and namespace", func() { + activeAppWrapper.AtomicSet(&arbv1.AppWrapper{ + ObjectMeta: v1.ObjectMeta{ + Name: "an-appwrapper-name", + Namespace: "unit-test-namespace", + }, + }) + gomega.Expect(activeAppWrapper.IsActiveAppWrapper("an-appwrapper-name", "unit-test-namespace"), + gomega.BeTrue()) + }) + ginkgo.It("should return 'false' for a the same app wrapper name and namespace", func() { + activeAppWrapper.AtomicSet(&arbv1.AppWrapper{ + ObjectMeta: v1.ObjectMeta{ + Name: "an-appwrapper-name", + Namespace: "unit-test-namespace", + }, + }) + gomega.Expect(activeAppWrapper.IsActiveAppWrapper("another-appwrapper-name", "other-unit-test-namespace"), + gomega.BeTrue()) + }) + }) + +}) diff --git a/pkg/controller/queuejob/queuejob_controller_ex.go b/pkg/controller/queuejob/queuejob_controller_ex.go index 6290bcdea..ed34cd8eb 100644 --- a/pkg/controller/queuejob/queuejob_controller_ex.go +++ b/pkg/controller/queuejob/queuejob_controller_ex.go @@ -39,12 +39,9 @@ import ( "sort" "strconv" "strings" - "sync" "time" - "github.com/eapache/go-resiliency/retrier" "github.com/gogo/protobuf/proto" - "github.com/hashicorp/go-multierror" qmutils "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/quotaplugins/util" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota/quotaforestmanager" @@ -54,8 +51,8 @@ import ( "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/metrics/adapter" "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/controller/quota" "k8s.io/apimachinery/pkg/api/equality" - apierrors "k8s.io/apimachinery/pkg/api/errors" + apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" @@ -108,6 +105,9 @@ const ( ControllerUIDLabel string = "controller-uid" ) +// controllerKind contains the schema.GroupVersionKind for this controller type. +var controllerKind = arbv1.SchemeGroupVersion.WithKind("AppWrapper") + // XController the AppWrapper Controller type type XController struct { config *rest.Config @@ -159,7 +159,7 @@ type XController struct { dispatchMap map[string]string // Metrics API Server - metricsAdapter *adapter.MetricsAdapter + metricsAdapter *adapter.MetricsAdpater // EventQueueforAgent agentEventQueue *cache.FIFO @@ -168,8 +168,7 @@ type XController struct { quotaManager quota.QuotaManagerInterface // Active Scheduling AppWrapper - schedulingAW *arbv1.AppWrapper - schedulingMutex sync.RWMutex + schedulingAW *ActiveAppWrapper } type JobAndClusterAgent struct { @@ -184,7 +183,7 @@ func NewJobAndClusterAgent(qjKey string, qaKey string) *JobAndClusterAgent { } } -// RegisterAllQueueJobResourceTypes - registers all resources +// RegisterAllQueueJobResourceTypes - gegisters all resources func RegisterAllQueueJobResourceTypes(regs *queuejobresources.RegisteredResources) { respod.Register(regs) resservice.Register(regs) @@ -228,7 +227,7 @@ func NewJobController(config *rest.Config, serverOption *options.ServerOption) * updateQueue: cache.NewFIFO(GetQueueJobKey), qjqueue: NewSchedulingQueue(), cache: clusterstatecache.New(config), - schedulingAW: nil, + schedulingAW: NewActiveAppWrapper(), } cc.metricsAdapter = adapter.New(serverOption, config, cc.cache) @@ -436,13 +435,11 @@ func (qjm *XController) PreemptQueueJobs() { var updateNewJob *arbv1.AppWrapper var message string - newjob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[PreemptQueueJobs] get fresh app wrapper") - if err != nil { - klog.Warningf("[PreemptQueueJobs] failed in retrieving a fresh copy of the app wrapper '%s/%s', err=%v. Will try to preempt on the next run.", aw.Namespace, aw.Name, err) + newjob, e := qjm.queueJobLister.AppWrappers(aw.Namespace).Get(aw.Name) + if e != nil { continue } newjob.Status.CanRun = false - newjob.Status.FilterIgnore = true // update QueueJobState only cleanAppWrapper := false //If dispatch deadline is exceeded no matter what the state of AW, kill the job and set status as Failed. if (aw.Status.State == arbv1.AppWrapperStateActive) && (aw.Spec.SchedSpec.DispatchDuration.Limit > 0) { @@ -461,14 +458,11 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.QueueJobState = arbv1.AppWrapperCondFailed newjob.Status.Running = 0 updateNewJob = newjob.DeepCopy() - - err := qjm.updateStatusInEtcdWithRetry(updateNewJob, "PreemptQueueJobs - CanRun: false -- DispatchDeadlineExceeded") - if err != nil { - klog.Warningf("[PreemptQueueJobs] status update CanRun: false -- DispatchDeadlineExceeded for '%s/%s' failed", aw.Namespace, aw.Name) - continue + if err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } //cannot use cleanup AW, since it puts AW back in running state - go qjm.qjqueue.AddUnschedulableIfNotPresent(updateNewJob) + go qjm.qjqueue.AddUnschedulableIfNotPresent(aw) //Move to next AW continue @@ -486,22 +480,15 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.Conditions[index] = *cond.DeepCopy() } - if aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { - aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = aw.Spec.SchedSpec.Requeuing.TimeInSeconds - } if aw.Spec.SchedSpec.Requeuing.GrowthType == "exponential" { - if newjob.Status.RequeueingTimeInSeconds == 0 { - newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds - } else { - newjob.Status.RequeueingTimeInSeconds += newjob.Status.RequeueingTimeInSeconds - } + newjob.Spec.SchedSpec.Requeuing.TimeInSeconds += aw.Spec.SchedSpec.Requeuing.TimeInSeconds } else if aw.Spec.SchedSpec.Requeuing.GrowthType == "linear" { - newjob.Status.RequeueingTimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds + newjob.Spec.SchedSpec.Requeuing.TimeInSeconds += aw.Spec.SchedSpec.Requeuing.InitialTimeInSeconds } if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds > 0 { - if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Status.RequeueingTimeInSeconds { - newjob.Status.RequeueingTimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds + if aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds <= newjob.Spec.SchedSpec.Requeuing.TimeInSeconds { + newjob.Spec.SchedSpec.Requeuing.TimeInSeconds = aw.Spec.SchedSpec.Requeuing.MaxTimeInSeconds } } @@ -509,7 +496,7 @@ func (qjm *XController) PreemptQueueJobs() { newjob.Status.State = arbv1.AppWrapperStateDeleted cleanAppWrapper = true } else { - newjob.Status.NumberOfRequeueings += 1 + newjob.Spec.SchedSpec.Requeuing.NumRequeuings += 1 } updateNewJob = newjob.DeepCopy() @@ -529,21 +516,17 @@ func (qjm *XController) PreemptQueueJobs() { updateNewJob = newjob.DeepCopy() } - - err = qjm.updateStatusInEtcdWithRetry(updateNewJob, "PreemptQueueJobs - CanRun: false -- MinPodsNotRunning") - if err != nil { - klog.Warningf("[PreemptQueueJobs] status update for '%s/%s' failed, skipping app wrapper err =%v", aw.Namespace, aw.Name, err) - continue + if err := qjm.updateEtcd(updateNewJob, "PreemptQueueJobs - CanRun: false"); err != nil { + klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", aw.Namespace, aw.Name, err) } - if cleanAppWrapper { - klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of re-queueing(s) exceeded.", aw.Name, aw.Namespace) - go qjm.Cleanup(updateNewJob) + klog.V(4).Infof("[PreemptQueueJobs] Deleting AppWrapper %s/%s due to maximum number of requeuings exceeded.", aw.Name, aw.Namespace) + go qjm.Cleanup(aw) } else { + klog.V(4).Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to backoff queue.", aw.Name, aw.Namespace) //Only back-off AWs that are in state running and not in state Failed if updateNewJob.Status.State != arbv1.AppWrapperStateFailed { - klog.Infof("[PreemptQueueJobs] Adding preempted AppWrapper %s/%s to back off queue.", aw.Name, aw.Namespace) - go qjm.backoff(updateNewJob, "PreemptionTriggered", string(message)) + go qjm.backoff(aw, "PreemptionTriggered", string(message)) } } } @@ -555,24 +538,16 @@ func (qjm *XController) preemptAWJobs(preemptAWs []*arbv1.AppWrapper) { } for _, aw := range preemptAWs { - apiCacheAWJob, err := qjm.getAppWrapper(aw.Namespace, aw.Name, "[preemptAWJobs] get fresh app wrapper") - if err != nil { - if apierrors.IsNotFound(err) { - klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when getting a fresh copy. ", aw.Namespace, aw.Name) - continue - } - klog.Errorf("[preemptAWJobs] Failed to get AppWrapper to from API Cache %s/%s: err = %v", - aw.Namespace, aw.Name, err) + apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(aw.Namespace).Get(aw.Name) + if e != nil { + klog.Errorf("[preemptQWJobs] Failed to get AppWrapper to from API Cache %v/%v: %v", + aw.Namespace, aw.Name, e) continue } apiCacheAWJob.Status.CanRun = false - err = qjm.updateStatusInEtcdWithRetry(apiCacheAWJob, "preemptAWJobs - CanRun: false") - if err != nil { - if apierrors.IsNotFound(err) { - klog.Warningf("[preemptAWJobs] App wrapper '%s/%s' was not found when updating status. ", aw.Namespace, aw.Name) - continue - } - klog.Warningf("[preemptAWJobs] status update for '%s/%s' failed, err=%v", aw.Namespace, aw.Name, err) + if err := qjm.updateEtcd(apiCacheAWJob, "preemptAWJobs - CanRun: false"); err != nil { + klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", + apiCacheAWJob.Namespace, apiCacheAWJob.Name, err) } } } @@ -637,15 +612,8 @@ func (qjm *XController) GetQueueJobsEligibleForPreemption() []*arbv1.AppWrapper } else { condition = lastCondition } - var requeuingTimeInSeconds int - if value.Status.RequeueingTimeInSeconds > 0 { - requeuingTimeInSeconds = value.Status.RequeueingTimeInSeconds - } else if value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { - requeuingTimeInSeconds = value.Spec.SchedSpec.Requeuing.TimeInSeconds - } else { - requeuingTimeInSeconds = value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds - } + requeuingTimeInSeconds := value.Spec.SchedSpec.Requeuing.TimeInSeconds minAge := condition.LastTransitionMicroTime.Add(time.Duration(requeuingTimeInSeconds) * time.Second) currentTime := time.Now() @@ -653,15 +621,19 @@ func (qjm *XController) GetQueueJobsEligibleForPreemption() []*arbv1.AppWrapper continue } + if value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds == 0 { + value.Spec.SchedSpec.Requeuing.InitialTimeInSeconds = value.Spec.SchedSpec.Requeuing.TimeInSeconds + } + if replicas > 0 { - klog.V(3).Infof("AppWrapper '%s/%s' is eligible for preemption Running: %d - minAvailable: %d , Succeeded: %d !!!", value.Namespace, value.Name, value.Status.Running, replicas, value.Status.Succeeded) + klog.V(3).Infof("AppWrapper %s is eligible for preemption Running: %v - minAvailable: %v , Succeeded: %v !!! \n", value.Name, value.Status.Running, replicas, value.Status.Succeeded) qjobs = append(qjobs, value) } } else { // Preempt when schedulingSpec stanza is not set but pods fails scheduling. // ignore co-scheduler pods if len(value.Status.PendingPodConditions) > 0 { - klog.V(3).Infof("AppWrapper '%s/%s' is eligible for preemption Running: %d , Succeeded: %d due to failed scheduling !!!", value.Namespace, value.Status.Running, value.Status.Succeeded) + klog.V(3).Infof("AppWrapper %s is eligible for preemption Running: %v , Succeeded: %v due to failed scheduling !!! \n", value.Name, value.Status.Running, value.Status.Succeeded) qjobs = append(qjobs, value) } } @@ -686,7 +658,7 @@ func GetPodTemplate(qjobRes *arbv1.AppWrapperResource) (*v1.PodTemplateSpec, err template, ok := obj.(*v1.PodTemplate) if !ok { - return nil, fmt.Errorf("resource template not define as a PodTemplate") + return nil, fmt.Errorf("Resource template not define as a PodTemplate") } return &template.Template, nil @@ -712,7 +684,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb // Get all pods and related resources countCompletionRequired := 0 - for i, genericItem := range caw.Spec.AggrResources.GenericItems { + for _, genericItem := range caw.Spec.AggrResources.GenericItems { if len(genericItem.CompletionStatus) > 0 { objectName := genericItem.GenericTemplate var unstruct unstructured.Unstructured @@ -730,12 +702,12 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } if len(name) == 0 { - klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: '%s/%s", caw.Namespace, caw.Name) + klog.Warningf("[getAppWrapperCompletionStatus] object name not present for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) } - klog.V(4).Infof("[getAppWrapperCompletionStatus] Checking if item %d named %s completed for appwrapper: '%s/%s'...", i+1, name, caw.Namespace, caw.Name) + klog.Infof("[getAppWrapperCompletionStatus] Checking items completed for appwrapper: %v in namespace: %v", caw.Name, caw.Namespace) + status := qjm.genericresources.IsItemCompleted(&genericItem, caw.Namespace, caw.Name, name) if !status { - klog.V(4).Infof("[getAppWrapperCompletionStatus] Item %d named %s not completed for appwrapper: '%s/%s'", i+1, name, caw.Namespace, caw.Name) //early termination because a required item is not completed return caw.Status.State } @@ -745,7 +717,7 @@ func (qjm *XController) getAppWrapperCompletionStatus(caw *arbv1.AppWrapper) arb } } - klog.V(4).Infof("[getAppWrapperCompletionStatus] App wrapper '%s/%s' countCompletionRequired %d, podsRunning %d, podsPending %d", caw.Namespace, caw.Name, countCompletionRequired, caw.Status.Running, caw.Status.Pending) + klog.V(4).Infof("[getAppWrapperCompletionStatus] countCompletionRequired %v, podsRunning %v, podsPending %v", countCompletionRequired, caw.Status.Running, caw.Status.Pending) //Set new status only when completion required flag is present in genericitems array if countCompletionRequired > 0 { @@ -805,7 +777,7 @@ func (qjm *XController) getProposedPreemptions(requestingJob *arbv1.AppWrapper, //Sort keys of map priorityKeyValues := make([]float64, len(preemptableAWs)) i := 0 - for key := range preemptableAWs { + for key, _ := range preemptableAWs { priorityKeyValues[i] = key i++ } @@ -833,7 +805,7 @@ func (qjm *XController) getProposedPreemptions(requestingJob *arbv1.AppWrapper, } } - if !foundEnoughResources { + if foundEnoughResources == false { klog.V(10).Infof("[getProposedPreemptions] Not enought preemptable jobs to dispatch %s.", requestingJob.Name) } @@ -887,7 +859,7 @@ func (qjm *XController) getDispatchedAppWrappers() (map[string]*clusterstateapi. for _, aw := range appwrappers { // Get dispatched jobs - if aw.Status.CanRun { + if aw.Status.CanRun == true { id := qmutils.CreateId(aw.Namespace, aw.Name) awrRetVal[id] = qjm.GetAggregatedResources(aw) awsRetVal[id] = aw @@ -1003,6 +975,8 @@ func (qjm *XController) getAggregatedAvailableResourcesPriority(unallocatedClust } klog.V(6).Infof("[getAggAvaiResPri] The value of pending is %v", pending) continue + } else { + //Do nothing } } @@ -1111,436 +1085,425 @@ func (qjm *XController) ScheduleNext() { if err != nil { klog.Errorf("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) return // Try to pop qjqueue again + } else { + klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) } - qjm.schedulingMutex.Lock() - qjm.schedulingAW = qj - qjm.schedulingMutex.Unlock() - // ensure that current active appwrapper is reset at the end of this function, to prevent - // the appwrapper from being added in syncjob - defer qjm.schedulingAWAtomicSet(nil) - - scheduleNextRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - scheduleNextRetrier.SetJitter(0.05) - //Retry the execution - err = scheduleNextRetrier.Run(func() error { - klog.Infof("[ScheduleNext] activeQ.Pop %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, - qj.ResourceVersion, qj.Status) - - apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- get fresh copy after queue pop") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) - return nil - } - klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, retryErr) - return retryErr - } - // make sure qj has the latest information - if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(10).Infof("[ScheduleNext] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(10).Infof("[ScheduleNext] '%s/%s' found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) - apiCacheAWJob.DeepCopyInto(qj) + + apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level + if e != nil { + klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) + return + } + // make sure qj has the latest information + if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { + klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) + klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) + apiCacheAWJob.DeepCopyInto(qj) + } + qjm.schedulingAW.AtomicSet(qj) + + // Re-compute SystemPriority for DynamicPriority policy + if qjm.serverOption.DynamicPriority { + klog.V(4).Info("[ScheduleNext] dynamic priority enabled") + // Create newHeap to temporarily store qjqueue jobs for updating SystemPriority + tempQ := newHeap(cache.MetaNamespaceKeyFunc, HigherSystemPriorityQJ) + qj.Status.SystemPriority = float64(qj.Spec.Priority) + qj.Spec.PrioritySlope*(time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time)).Seconds() + tempQ.Add(qj) + for qjm.qjqueue.Length() > 0 { + qjtemp, _ := qjm.qjqueue.Pop() + qjtemp.Status.SystemPriority = float64(qjtemp.Spec.Priority) + qjtemp.Spec.PrioritySlope*(time.Now().Sub(qjtemp.Status.ControllerFirstTimestamp.Time)).Seconds() + tempQ.Add(qjtemp) } - if qj.Status.CanRun { - klog.V(4).Infof("[ScheduleNext] AppWrapper '%s/%s' from priority queue is already scheduled. Ignoring request: Status=%+v", qj.Namespace, qj.Name, qj.Status) - return nil + // move AppWrappers back to activeQ and sort based on SystemPriority + for tempQ.data.Len() > 0 { + qjtemp, _ := tempQ.Pop() + qjm.qjqueue.AddIfNotPresent(qjtemp.(*arbv1.AppWrapper)) } - - // Re-compute SystemPriority for DynamicPriority policy - if qjm.serverOption.DynamicPriority { - klog.V(4).Info("[ScheduleNext] dynamic priority enabled") - // Create newHeap to temporarily store qjqueue jobs for updating SystemPriority - tempQ := newHeap(cache.MetaNamespaceKeyFunc, HigherSystemPriorityQJ) - qj.Status.SystemPriority = float64(qj.Spec.Priority) + qj.Spec.PrioritySlope*(time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time)).Seconds() - tempQ.Add(qj) - for qjm.qjqueue.Length() > 0 { - qjtemp, _ := qjm.qjqueue.Pop() - qjtemp.Status.SystemPriority = float64(qjtemp.Spec.Priority) + qjtemp.Spec.PrioritySlope*(time.Now().Sub(qjtemp.Status.ControllerFirstTimestamp.Time)).Seconds() - tempQ.Add(qjtemp) - } - // move AppWrappers back to activeQ and sort based on SystemPriority - for tempQ.data.Len() > 0 { - qjtemp, _ := tempQ.Pop() - qjm.qjqueue.AddIfNotPresent(qjtemp.(*arbv1.AppWrapper)) - } - // Print qjqueue.ativeQ for debugging - if klog.V(4).Enabled() { - pq := qjm.qjqueue.(*PriorityQueue) - if qjm.qjqueue.Length() > 0 { - for key, element := range pq.activeQ.data.items { - qjtemp := element.obj.(*arbv1.AppWrapper) - klog.V(4).Infof("[ScheduleNext] AfterCalc: qjqLength=%d Key=%s index=%d Priority=%.1f SystemPriority=%.1f QueueJobState=%s", - qjm.qjqueue.Length(), key, element.index, float64(qjtemp.Spec.Priority), qjtemp.Status.SystemPriority, qjtemp.Status.QueueJobState) - } + // Print qjqueue.ativeQ for debugging + if klog.V(4).Enabled() { + pq := qjm.qjqueue.(*PriorityQueue) + if qjm.qjqueue.Length() > 0 { + for key, element := range pq.activeQ.data.items { + qjtemp := element.obj.(*arbv1.AppWrapper) + klog.V(4).Infof("[ScheduleNext] AfterCalc: qjqLength=%d Key=%s index=%d Priority=%.1f SystemPriority=%.1f QueueJobState=%s", + qjm.qjqueue.Length(), key, element.index, float64(qjtemp.Spec.Priority), qjtemp.Status.SystemPriority, qjtemp.Status.QueueJobState) } } + } - // Retrieve HeadOfLine after priority update - qj, retryErr = qjm.qjqueue.Pop() - if retryErr != nil { - klog.V(3).Infof("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", retryErr) - return err - } + // Retrieve HeadOfLine after priority update + qj, err = qjm.qjqueue.Pop() + if err != nil { + klog.V(3).Infof("[ScheduleNext] Cannot pop QueueJob from qjqueue! err=%#v", err) + } else { klog.V(3).Infof("[ScheduleNext] activeQ.Pop_afterPriorityUpdate %s *Delay=%.6f seconds RemainingLength=%d &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.Length(), qj, qj.ResourceVersion, qj.Status) - apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] -- after dynamic priority pop") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - return nil - } - klog.Errorf("[ScheduleNext] failed to get a fresh copy of the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, retryErr) - return err - } - if apiCacheAWJob.Status.CanRun { - klog.Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v", qj.Name, qj.Status) - return nil - } - apiCacheAWJob.DeepCopyInto(qj) - qjm.schedulingAWAtomicSet(qj) } + qjm.schedulingAW.AtomicSet(qj) + } + + if qj.Status.CanRun { + klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from prioirty queue is already scheduled. Ignoring request: Status=%+v\n", qj.Name, qj.Status) + return + } + apiCacheAppWrapper, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + if err != nil { + klog.Errorf("[ScheduleNext] Fail get AppWrapper job: %s from the api cache, err=%#v", qj.Name, err) + return + } + if apiCacheAppWrapper.Status.CanRun { + klog.V(4).Infof("[ScheduleNext] AppWrapper job: %s from API is already scheduled. Ignoring request: Status=%+v\n", apiCacheAppWrapper.Name, apiCacheAppWrapper.Status) + return + } - qj.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine - qjm.addOrUpdateCondition(qj, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") + qj.Status.QueueJobState = arbv1.AppWrapperCondHeadOfLine + qjm.addOrUpdateCondition(qj, arbv1.AppWrapperCondHeadOfLine, v1.ConditionTrue, "FrontOfQueue.", "") - qj.Status.FilterIgnore = true // update QueueJobState only - retryErr = qjm.updateStatusInEtcd(qj, "ScheduleNext - setHOL") - if retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[ScheduleNext] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) - } else { - klog.Errorf("[ScheduleNext] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, retryErr) + qj.Status.FilterIgnore = true // update QueueJobState only + qjm.updateEtcd(qj, "ScheduleNext - setHOL") + qjm.qjqueue.AddUnschedulableIfNotPresent(qj) // working on qj, avoid other threads putting it back to activeQ + + klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%+v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) + if qjm.isDispatcher { + klog.Infof("[ScheduleNext] [Dispatcher Mode] Dispatch Next QueueJob: %s\n", qj.Name) + } else { + klog.Infof("[ScheduleNext] [Agent Mode] Deploy Next QueueJob: %s Status=%+v\n", qj.Name, qj.Status) + } + + dispatchFailedReason := "AppWrapperNotRunnable." + dispatchFailedMessage := "" + if qjm.isDispatcher { // Dispatcher Mode + agentId := qjm.chooseAgent(qj) + if agentId != "" { // A proper agent is found. + // Update states (CanRun=True) of XQJ in API Server + // Add XQJ -> Agent Map + apiQueueJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + if err != nil { + klog.Errorf("[ScheduleNext] Fail get AppWrapper job: %s from the api cache, err=%#v", qj.Name, err) + return + } + // make sure qj has the latest information + if larger(apiQueueJob.ResourceVersion, qj.ResourceVersion) { + klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) + klog.V(10).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiQueueJob.Name, apiQueueJob, apiQueueJob) + apiQueueJob.DeepCopyInto(qj) + } + + //apiQueueJob.Status.CanRun = true + qj.Status.CanRun = true + queueJobKey, _ := GetQueueJobKey(qj) + qjm.dispatchMap[queueJobKey] = agentId + klog.V(10).Infof("[TTime] %s, %s: ScheduleNextBeforeEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + qjm.updateEtcd(qj, "ScheduleNext - setCanRun") + if err := qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) + qjm.qjqueue.MoveToActiveQueueIfExists(qj) + } else { // successful add to eventQueue, remove from qjqueue + if qjm.qjqueue.IfExist(qj) { + klog.V(10).Infof("[ScheduleNext] AppWrapper %s will be deleted from priority queue and sent to event queue", qj.Name) + } + qjm.qjqueue.Delete(qj) } - return retryErr - } - qjm.qjqueue.AddUnschedulableIfNotPresent(qj) // working on qj, avoid other threads putting it back to activeQ - klog.V(4).Infof("[ScheduleNext] after Pop qjqLength=%d qj %s Version=%s activeQ=%t Unsched=%t Status=%v", qjm.qjqueue.Length(), qj.Name, qj.ResourceVersion, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj.Status) - if qjm.isDispatcher { - klog.Infof("[ScheduleNext] [Dispatcher Mode] Attempting to dispatch next appwrapper: '%s/%s Status=%v", qj.Namespace, qj.Name, qj.Status) + //if _, err := qjm.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(apiQueueJob); err != nil { + // klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", qj.Namespace, qj.Name, err) + //} + klog.V(10).Infof("[TTime] %s, %s: ScheduleNextAfterEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + return } else { - klog.Infof("[ScheduleNext] [Agent Mode] Attempting to dispatch next appwrapper: '%s/%s' Status=%v", qj.Namespace, qj.Name, qj.Status) + dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper." + klog.V(2).Infof("[Controller: Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage) + go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) } - - dispatchFailedReason := "AppWrapperNotRunnable." - dispatchFailedMessage := "" - if qjm.isDispatcher { // Dispatcher Mode - agentId := qjm.chooseAgent(qj) - if agentId != "" { // A proper agent is found. - // Update states (CanRun=True) of XQJ in API Server - // Add XQJ -> Agent Map - apiCacheAWJob, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Dispatcher Mode] get appwrapper") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) - return nil - } - klog.Errorf("[ScheduleNext] [Dispatcher Mode] failed to retrieve the app wrapper '%s/%s', err=%#v", qj.Namespace, qj.Name, err) - return err - } - // make sure qj has the latest information - if larger(apiCacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] App wrapper '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] App wrapper '%s/%s' found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiCacheAWJob.Namespace, apiCacheAWJob.Name, apiCacheAWJob, apiCacheAWJob) - apiCacheAWJob.DeepCopyInto(qj) - } - qj.Status.CanRun = true - queueJobKey, _ := GetQueueJobKey(qj) - qjm.dispatchMap[queueJobKey] = agentId - klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] %s, %s: ScheduleNextBeforeEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) - retryErr = qjm.updateStatusInEtcd(qj, "[ScheduleNext] [Dispatcher Mode] - setCanRun") - if retryErr != nil { - if apierrors.IsConflict(err) { - klog.Warningf("[ScheduleNext] [Dispatcher Mode] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) - } else { - klog.Errorf("[ScheduleNext] [Dispatcher Mode] Failed to updated status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) - } - return retryErr - } - if err00 := qjm.eventQueue.Add(qj); err00 != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] [Dispatcher Mode] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) - qjm.qjqueue.MoveToActiveQueueIfExists(qj) - } else { // successful add to eventQueue, remove from qjqueue - if qjm.qjqueue.IfExist(qj) { - klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] AppWrapper %s will be deleted from priority queue and sent to event queue", qj.Name) - } - qjm.qjqueue.Delete(qj) + } else { // Agent Mode + aggqj := qjm.GetAggregatedResources(qj) + + // HeadOfLine logic + HOLStartTime := time.Now() + forwarded := false + fowardingLoopCount := 1 + quotaFits := false + // Try to forward to eventQueue for at most HeadOfLineHoldingTime + for !forwarded { + klog.Infof("[ScheduleNext] Forwarding loop iteration: %d", fowardingLoopCount) + priorityindex := qj.Status.SystemPriority + // Support for Non-Preemption + if !qjm.serverOption.Preemption { + priorityindex = -math.MaxFloat64 + } + // Disable Preemption under DynamicPriority. Comment out if allow DynamicPriority and Preemption at the same time. + if qjm.serverOption.DynamicPriority { + priorityindex = -math.MaxFloat64 + } + resources, proposedPreemptions := qjm.getAggregatedAvailableResourcesPriority( + qjm.cache.GetUnallocatedResources(), priorityindex, qj, "") + klog.Infof("[ScheduleNext] XQJ %s with resources %v to be scheduled on aggregated idle resources %v", qj.Name, aggqj, resources) + + // Assume preemption will remove low priroity AWs in the system, optimistically dispatch such AWs + + if aggqj.LessEqual(resources) { + unallocatedHistogramMap := qjm.cache.GetUnallocatedHistograms() + if !qjm.nodeChecks(unallocatedHistogramMap, qj) { + klog.V(4).Infof("[ScheduleNext] Optimistic dispatch for AW %v in namespace %v requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", qj.Name, qj.Namespace, qjm.GetAggregatedResources(qj), proto.MarshalTextString(unallocatedHistogramMap["gpu"])) } - klog.V(10).Infof("[ScheduleNext] [Dispatcher Mode] %s, %s: ScheduleNextAfterEtcd", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) - return nil - } else { - dispatchFailedMessage = "Cannot find an cluster with enough resources to dispatch AppWrapper." - klog.V(2).Infof("[ScheduleNex] [Dispatcher Mode] %s %s\n", dispatchFailedReason, dispatchFailedMessage) - go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) - } - } else { // Agent Mode - aggqj := qjm.GetAggregatedResources(qj) - - // HeadOfLine logic - HOLStartTime := time.Now() - forwarded := false - fowardingLoopCount := 1 - quotaFits := false - // Try to forward to eventQueue for at most HeadOfLineHoldingTime - for !forwarded { - klog.V(4).Infof("[ScheduleNext] [Agent Mode] Forwarding loop iteration: %d", fowardingLoopCount) - priorityindex := qj.Status.SystemPriority - // Support for Non-Preemption - if !qjm.serverOption.Preemption { - priorityindex = -math.MaxFloat64 - } - // Disable Preemption under DynamicPriority. Comment out if allow DynamicPriority and Preemption at the same time. - if qjm.serverOption.DynamicPriority { - priorityindex = -math.MaxFloat64 - } - resources, proposedPreemptions := qjm.getAggregatedAvailableResourcesPriority( - qjm.cache.GetUnallocatedResources(), priorityindex, qj, "") - klog.Infof("[ScheduleNext] [Agent Mode] Appwrapper '%s/%s' with resources %v to be scheduled on aggregated idle resources %v", qj.Namespace, qj.Name, aggqj, resources) - - // Assume preemption will remove low priroity AWs in the system, optimistically dispatch such AWs - - if aggqj.LessEqual(resources) { - unallocatedHistogramMap := qjm.cache.GetUnallocatedHistograms() - if !qjm.nodeChecks(unallocatedHistogramMap, qj) { - klog.Infof("[ScheduleNext] [Agent Mode] Optimistic dispatch for AW '%s/%s' requesting aggregated resources %v histogram for point in-time fragmented resources are available in the cluster %s", - qj.Name, qj.Namespace, qjm.GetAggregatedResources(qj), proto.MarshalTextString(unallocatedHistogramMap["gpu"])) - } - // Now evaluate quota - fits := true - klog.Infof("[ScheduleNext] [Agent Mode] available resourse successful check for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v.", - qj.Name, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if qjm.serverOption.QuotaEnabled { - if qjm.quotaManager != nil { - // Quota tree design: - // - All AppWrappers without quota submission will consume quota from the 'default' node. - // - All quota trees in the system should have a 'default' node so AppWrappers without - // quota specification can be dispatched - // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value - // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be - // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' - // such node borrows quota from other nodes already in the system) - allTrees := qjm.quotaManager.GetValidQuotaLabels() - newLabels := make(map[string]string) - for key, value := range qj.Labels { - newLabels[key] = value - } - updateLabels := false - for _, treeName := range allTrees { - if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { - newLabels[treeName] = "default" - updateLabels = true - } + // Now evaluate quota + fits := true + klog.V(4).Infof("[ScheduleNext] HOL available resourse successful check for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if qjm.serverOption.QuotaEnabled { + if qjm.quotaManager != nil { + // Quota tree design: + // - All AppWrappers without quota submission will consume quota from the 'default' node. + // - All quota trees in the system should have a 'default' node so AppWrappers without + // quota specification can be dispatched + // - If the AppWrapper doesn't have a quota label, then one is added for every tree with the 'default' value + // - Depending on how the 'default' node is configured, AppWrappers that don't specify quota could be + // preemptable by default (e.g., 'default' node with 'cpu: 0m' and 'memory: 0Mi' quota and 'hardLimit: false' + // such node borrows quota from other nodes already in the system) + apiCacheAWJob, err := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + if err != nil { + klog.Errorf("[ScheduleNext] Failed to get AppWrapper from API Cache %v/%v: %v", + qj.Namespace, qj.Name, err) + continue + } + allTrees := qjm.quotaManager.GetValidQuotaLabels() + newLabels := make(map[string]string) + for key, value := range apiCacheAWJob.Labels { + newLabels[key] = value + } + updateLabels := false + for _, treeName := range allTrees { + if _, quotaSetForAW := newLabels[treeName]; !quotaSetForAW { + newLabels[treeName] = "default" + updateLabels = true } - if updateLabels { - tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] update labels") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[ScheduleNext] [Agent Mode] app wrapper '%s/%s' not found while trying to update labels, skiping dispatch.", qj.Namespace, qj.Name) - return nil - } - return retryErr - } - tempAW.SetLabels(newLabels) - updatedAW, retryErr := qjm.updateEtcd(tempAW, "ScheduleNext [Agent Mode] - setDefaultQuota") - if retryErr != nil { - if apierrors.IsConflict(err) { - klog.Warningf("[ScheduleNext] [Agent mode] Conflict error detected when updating labels in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) - } else { - klog.Errorf("[ScheduleNext] [Agent mode] Failed to update labels in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) - } - return retryErr - } - klog.Infof("[ScheduleNext] [Agent Mode] Default quota added to AW '%s/%s'", qj.Namespace, qj.Name) - updatedAW.DeepCopyInto(qj) + } + if updateLabels { + apiCacheAWJob.SetLabels(newLabels) + if err := qjm.updateEtcd(apiCacheAWJob, "ScheduleNext - setDefaultQuota"); err == nil { + klog.V(3).Infof("[ScheduleNext] Default quota added to AW %v", qj.Name) + } else { + klog.V(3).Infof("[ScheduleNext] Failed to added default quota to AW %v, skipping dispatch of AW", qj.Name) + return } - var msg string - var preemptAWs []*arbv1.AppWrapper - quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) - if quotaFits { - klog.Infof("[ScheduleNext] [Agent mode] quota evaluation successful for app wrapper '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - // Set any jobs that are marked for preemption - qjm.preemptAWJobs(preemptAWs) - } else { // Not enough free quota to dispatch appwrapper - dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." - if len(msg) > 0 { - dispatchFailedReason += " " - dispatchFailedReason += msg - } - klog.Infof("[ScheduleNext] [Agent Mode] Blocking dispatch for app wrapper '%s/%s' due to quota limits, activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s", - qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) + } + var msg string + var preemptAWs []*arbv1.AppWrapper + quotaFits, preemptAWs, msg = qjm.quotaManager.Fits(qj, aggqj, proposedPreemptions) + if quotaFits { + klog.Infof("[ScheduleNext] HOL quota evaluation successful %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v due to quota limits", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + // Set any jobs that are marked for preemption + qjm.preemptAWJobs(preemptAWs) + } else { // Not enough free quota to dispatch appwrapper + dispatchFailedMessage = "Insufficient quota to dispatch AppWrapper." + if len(msg) > 0 { + dispatchFailedReason += " " + dispatchFailedReason += msg } - fits = quotaFits - } else { - fits = false - //Quota manager not initialized - dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." - klog.Errorf("[ScheduleNext] [Agent Mode] Quota evaluation is enabled but not initialized. AppWrapper '%s/%s' does not have enough quota", qj.Namespace, qj.Name) + klog.V(3).Infof("[ScheduleNext] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v msg=%s, due to quota limits", + qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, msg) } + fits = quotaFits } else { - klog.V(4).Infof("[ScheduleNext] [Agent Mode] quota evaluation not enabled for '%s/%s' at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, - qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + fits = false + //Quota manager not initialized + dispatchFailedMessage = "Quota evaluation is enabled but not initialized. Insufficient quota to dispatch AppWrapper." + klog.Errorf("[ScheduleNext] Quota evaluation is enabled but not initialized. AppWrapper %s/%s does not have enough quota\n", qj.Name, qj.Namespace) } - // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched - if fits { - - // aw is ready to go! - tempAW, retryErr := qjm.getAppWrapper(qj.Namespace, qj.Name, "[ScheduleNext] [Agent Mode] -- ready to dispatch") - if retryErr != nil { - if apierrors.IsNotFound(retryErr) { - return nil - } - klog.Errorf("[ScheduleNext] [Agent Mode] Failed to get fresh copy of the app wrapper '%s/%s' to update status, err = %v", qj.Namespace, qj.Name, err) - return retryErr - } - desired := int32(0) - for i, ar := range tempAW.Spec.AggrResources.Items { - desired += ar.Replicas - tempAW.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas - } + } else { + klog.V(4).Infof("[ScheduleNext] HOL quota evaluation not enabled for %s at %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } - tempAW.Status.CanRun = true - tempAW.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event - retryErr = qjm.updateStatusInEtcd(tempAW, "ScheduleNext - setCanRun") - if retryErr != nil { - if qjm.quotaManager != nil && quotaFits { - //Quota was allocated for this appwrapper, release it. - qjm.quotaManager.Release(qj) - } - if apierrors.IsNotFound(retryErr) { - klog.Warningf("[ScheduleNext] [Agent Mode] app wrapper '%s/%s' not found after status update, skiping dispatch.", qj.Namespace, qj.Name) - return nil - } else if apierrors.IsConflict(retryErr) { - klog.Warningf("[ScheduleNext] [Agent mode] Conflict error detected when updating status in etcd for app wrapper '%s/%s, status = %+v. Retrying update.", qj.Namespace, qj.Name, qj.Status) - } else if retryErr != nil { - klog.Errorf("[ScheduleNext] [Agent mode] Failed to update status in etcd for app wrapper '%s/%s', status = %+v, err=%v", qj.Namespace, qj.Name, qj.Status, err) - } - return retryErr + // If quota evalauation sucedeed or quota evaluation not enabled set the appwrapper to be dispatched + if fits { + // aw is ready to go! + apiQueueJob, e := qjm.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + // apiQueueJob's ControllerFirstTimestamp is only microsecond level instead of nanosecond level + if e != nil { + klog.Errorf("[ScheduleNext] Unable to get AW %s from API cache &aw=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) + if qjm.quotaManager != nil && quotaFits { + //Quota was allocated for this appwrapper, release it. + qjm.quotaManager.Release(qj) } - tempAW.DeepCopyInto(qj) + return + } + // make sure qj has the latest information + if larger(apiQueueJob.ResourceVersion, qj.ResourceVersion) { + klog.V(4).Infof("[ScheduleNext] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) + klog.V(4).Infof("[ScheduleNext] %s found more recent copy from cache &apiQueueJob=%p apiQueueJob=%+v", apiQueueJob.Name, apiQueueJob, apiQueueJob) + apiQueueJob.DeepCopyInto(qj) + } + desired := int32(0) + for i, ar := range qj.Spec.AggrResources.Items { + desired += ar.Replicas + qj.Spec.AggrResources.Items[i].AllocatedReplicas = ar.Replicas + } + qj.Status.CanRun = true + qj.Status.FilterIgnore = true // update CanRun & Spec. no need to trigger event + // Handle k8s watch race condition + if err := qjm.updateEtcd(qj, "ScheduleNext - setCanRun"); err == nil { // add to eventQueue for dispatching to Etcd - if err00 := qjm.eventQueue.Add(qj); err00 != nil { // unsuccessful add to eventQueue, add back to activeQ - klog.Errorf("[ScheduleNext] [Agent Mode] Failed to add '%s/%s' to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Namespace, - qj.Name, qj, qj.ResourceVersion, qj.Status, err) + if err = qjm.eventQueue.Add(qj); err != nil { // unsuccessful add to eventQueue, add back to activeQ + klog.Errorf("[ScheduleNext] Fail to add %s to eventQueue, activeQ.Add_toSchedulingQueue &qj=%p Version=%s Status=%+v err=%#v", qj.Name, qj, qj.ResourceVersion, qj.Status, err) qjm.qjqueue.MoveToActiveQueueIfExists(qj) } else { // successful add to eventQueue, remove from qjqueue qjm.qjqueue.Delete(qj) forwarded = true - klog.Infof("[ScheduleNext] [Agent Mode] Successfully dispatched app wrapper '%s/%s' activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[ScheduleNext] %s Delay=%.6f seconds eventQueue.Add_afterHeadOfLine activeQ=%t, Unsched=%t &aw=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } - } //fits - } else { // Not enough free resources to dispatch HOL - dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." - klog.Infof("[ScheduleNext] [Agent Mode] Failed to dispatch app wrapper '%s/%s' due to insuficient resources, activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, qjm.qjqueue.IfExistActiveQ(qj), - qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - } - // if the HeadOfLineHoldingTime option is not set it will break the loop - schedulingTimeExpired := time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime) * time.Second)) - if forwarded { - break - } else if schedulingTimeExpired { - // stop trying to dispatch after HeadOfLineHoldingTime - // release quota if allocated - if qjm.quotaManager != nil && quotaFits { - //Quota was allocated for this appwrapper, release it. - qjm.quotaManager.Release(qj) - } - break - } else { // Try to dispatch again after one second - if qjm.quotaManager != nil && quotaFits { - //release any quota as the qj will be tried again and the quota might have been allocated. - qjm.quotaManager.Release(qj) - } - time.Sleep(time.Second * 1) + } //updateEtcd + } //fits + } else { // Not enough free resources to dispatch HOL + dispatchFailedMessage = "Insufficient resources to dispatch AppWrapper." + klog.V(4).Infof("[ScheduleNext] HOL Blocking by %s for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } + schedulingTimeExpired := time.Now().After(HOLStartTime.Add(time.Duration(qjm.serverOption.HeadOfLineHoldingTime) * time.Second)) + if forwarded { + break + } else if schedulingTimeExpired { + // stop trying to dispatch after HeadOfLineHoldingTime + // release quota if allocated + if qjm.quotaManager != nil && quotaFits { + //Quota was allocated for this appwrapper, release it. + qjm.quotaManager.Release(qj) } - fowardingLoopCount += 1 - } - if !forwarded { // start thread to backoff - klog.Infof("[ScheduleNext] [Agent Mode] backing off app wrapper '%s/%s' after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + break + } else { // Try to dispatch again after one second if qjm.quotaManager != nil && quotaFits { + //release any quota as the qj will be tried again and the quota might have been allocated. qjm.quotaManager.Release(qj) } - go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) + time.Sleep(time.Second * 1) } + fowardingLoopCount += 1 + } + if !forwarded { // start thread to backoff + klog.V(3).Infof("[ScheduleNext] HOL backoff %s after waiting for %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(HOLStartTime), qjm.qjqueue.IfExistActiveQ(qj), qjm.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if qjm.quotaManager != nil && quotaFits { + qjm.quotaManager.Release(qj) + } + go qjm.backoff(qj, dispatchFailedReason, dispatchFailedMessage) } - return nil - }) - if apierrors.IsNotFound(err) { - klog.Warningf("[ScheduleNext] app wrapper '%s/%s' not found skiping dispatch", qj.Namespace, qj.Name) - return - } - if err != nil { - klog.Warningf("[ScheduleNext] failed to dispatch the app wrapper '%s/%s', err= %v", qj.Namespace, qj.Name, err) - klog.Warningf("[ScheduleNext] retrying dispatch") - qjm.qjqueue.AddIfNotPresent(qj) } } // Update AppWrappers in etcd // todo: This is a current workaround for duplicate message bug. -func (cc *XController) updateEtcd(currentAppwrapper *arbv1.AppWrapper, caller string) (*arbv1.AppWrapper, error) { - klog.V(4).Infof("[updateEtcd] trying to update '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - currentAppwrapper.Status.Sender = "before " + caller // set Sender string to indicate code location - currentAppwrapper.Status.Local = false // for Informer FilterFunc to pickup - updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).Update(currentAppwrapper) - if err != nil { - return nil, err - } - if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { - klog.Warningf("[updateEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) - klog.Warningf("[updateEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) +func (cc *XController) updateEtcd(qj *arbv1.AppWrapper, at string) error { + //apiCacheAWJob, e := cc.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) + // + //if (e != nil) { + // klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", + // apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, e) + // return e + //} + + //TODO: Remove next line + var apiCacheAWJob *arbv1.AppWrapper + //TODO: Remove next line + apiCacheAWJob = qj + apiCacheAWJob.Status.Sender = "before " + at // set Sender string to indicate code location + apiCacheAWJob.Status.Local = false // for Informer FilterFunc to pickup + if _, err := cc.arbclients.ArbV1().AppWrappers(apiCacheAWJob.Namespace).Update(apiCacheAWJob); err != nil { + klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", + apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, err) + return err + // } else { // qjj should be the same as qj except with newer ResourceVersion + // qj.ResourceVersion = qjj.ResourceVersion // update new ResourceVersion from etcd } - - klog.V(4).Infof("[updateEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - return updatedAppwrapper.DeepCopy(), nil + klog.V(10).Infof("[updateEtcd] AppWrapperUpdate success %s at %s &qj=%p qj=%+v", + apiCacheAWJob.Name, at, apiCacheAWJob, apiCacheAWJob) + //qj.Status.Local = true // for Informer FilterFunc to ignore duplicate + //qj.Status.Sender = "after "+ at // set Sender string to indicate code location + return nil } -func (cc *XController) updateStatusInEtcd(currentAppwrapper *arbv1.AppWrapper, caller string) error { - klog.V(4).Infof("[updateStatusInEtcd] trying to update '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) - currentAppwrapper.Status.Sender = "before " + caller // set Sender string to indicate code location - updatedAppwrapper, err := cc.arbclients.ArbV1().AppWrappers(currentAppwrapper.Namespace).UpdateStatus(currentAppwrapper) - if err != nil { +func (cc *XController) updateStatusInEtcd(qj *arbv1.AppWrapper, at string) error { + var apiCacheAWJob *arbv1.AppWrapper + apiCacheAWJob = qj + if _, err := cc.arbclients.ArbV1().AppWrappers(apiCacheAWJob.Namespace).UpdateStatus(apiCacheAWJob); err != nil { + klog.Errorf("[updateEtcd] Failed to update status of AppWrapper %s, namespace: %s at %s err=%v", + apiCacheAWJob.Name, apiCacheAWJob.Namespace, at, err) return err } - if larger(currentAppwrapper.ResourceVersion, updatedAppwrapper.ResourceVersion) { - klog.Warningf("[updateStatusInEtcd] current app wrapper '%s/%s' called by '%s' has version %s", currentAppwrapper.Namespace, currentAppwrapper.Name, caller, currentAppwrapper.ResourceVersion) - klog.Warningf("[updateStatusInEtcd] updated app wrapper '%s/%s' called by '%s' has version %s", updatedAppwrapper.Namespace, updatedAppwrapper.Name, caller, updatedAppwrapper.ResourceVersion) - } - updatedAppwrapper.DeepCopyInto(currentAppwrapper) - klog.V(4).Infof("[updateStatusInEtcd] update success '%s/%s' called by '%s'", currentAppwrapper.Namespace, currentAppwrapper.Name, caller) + klog.V(10).Infof("[updateEtcd] AppWrapperUpdate success %s at %s &qj=%p qj=%+v", + apiCacheAWJob.Name, at, apiCacheAWJob, apiCacheAWJob) return nil } -func (cc *XController) updateStatusInEtcdWithRetry(source *arbv1.AppWrapper, caller string) error { - klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] trying to update '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) - source.Status.Sender = "before " + caller // set Sender string to indicate code location - updateStatusRetrierRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - updateStatusRetrierRetrier.SetJitter(0.05) - updatedAW := source.DeepCopy() - err := updateStatusRetrierRetrier.Run(func() error { - var retryErr error - updatedAW, retryErr = cc.arbclients.ArbV1().AppWrappers(updatedAW.Namespace).UpdateStatus(updatedAW) - if retryErr != nil && apierrors.IsConflict(retryErr) { - dest, retryErr := cc.getAppWrapper(source.Namespace, source.Name, caller) - if retryErr != nil && !apierrors.IsNotFound(retryErr) { - klog.Warningf("[updateStatusInEtcdWithMergeFunction] retrying the to update '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) - source.Status.DeepCopyInto(&dest.Status) - dest.Status.Sender = "before " + caller // set Sender string to indicate code location - dest.DeepCopyInto(updatedAW) - } - return retryErr - } - if retryErr == nil { - updatedAW.DeepCopyInto(source) - } - return retryErr - }) - if err != nil { - klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] update failure '%s/%s' called by '%s'", source.Namespace, source.Name, caller) - return err + +func (qjm *XController) waitForPodCountUpdates(searchCond *arbv1.AppWrapperCondition) bool { + + // Continue reserviing resourses if dispatched condition not found + if searchCond == nil { + klog.V(10).Infof("[waitForPodCountUpdates] No condition not found.") + return true + } + + // Current time + now := metav1.NowMicro() + nowPtr := &now + + // Last time AW was dispatched + dispactedTS := searchCond.LastUpdateMicroTime + dispactedTSPtr := &dispactedTS + + // Error checking + if nowPtr.Before(dispactedTSPtr) { + klog.Errorf("[waitForPodCountUpdates] Current timestamp: %s is before condition latest update timestamp: %s", + now.String(), dispactedTS.String()) + return true + } + + // Duration since last time AW was dispatched + timeSinceDispatched := now.Sub(dispactedTS.Time) + + // Convert timeout default from milli-seconds to microseconds + timeoutMicroSeconds := qjm.serverOption.DispatchResourceReservationTimeout * 1000 + + // Don't reserve resources if timeout is hit + if timeSinceDispatched.Microseconds() > timeoutMicroSeconds { + klog.V(10).Infof("[waitForPodCountUpdates] Dispatch duration time %d microseconds has reached timeout value of %d microseconds", + timeSinceDispatched.Microseconds(), timeoutMicroSeconds) + + return false + } + + klog.V(10).Infof("[waitForPodCountUpdates] Dispatch duration time %d microseconds has not reached timeout value of %d microseconds", + timeSinceDispatched.Microseconds(), timeoutMicroSeconds) + return true +} + +func (qjm *XController) getLatestStatusConditionType(aw *arbv1.AppWrapper, condType arbv1.AppWrapperConditionType) *arbv1.AppWrapperCondition { + var latestConditionBasedOnType arbv1.AppWrapperCondition + if aw.Status.Conditions != nil && len(aw.Status.Conditions) > 0 { + // Find the latest matching condition based on type not related other condition fields + for _, condition := range aw.Status.Conditions { + // Matching condition? + if condition.Type == condType { + //First time match? + if (arbv1.AppWrapperCondition{} == latestConditionBasedOnType) { + latestConditionBasedOnType = condition + } else { + // Compare current condition to last match and get keep the later condition + currentCondLastUpdate := condition.LastUpdateMicroTime + currentCondLastUpdatePtr := ¤tCondLastUpdate + lastCondLastUpdate := latestConditionBasedOnType.LastUpdateMicroTime + lastCondLastUpdatePtr := &lastCondLastUpdate + if lastCondLastUpdatePtr.Before(currentCondLastUpdatePtr) { + latestConditionBasedOnType = condition + } + } + } // Condition type match check + } // Loop through conditions of AW + } // AW has conditions? + + // If no matching condition found return nil otherwise return matching latest condition + if (arbv1.AppWrapperCondition{} == latestConditionBasedOnType) { + klog.V(10).Infof("[getLatestStatusConditionType] No disptach condition found for AppWrapper=%s/%s.", + aw.Name, aw.Namespace) + return nil + } else { + return &latestConditionBasedOnType } - klog.V(4).Infof("[updateStatusInEtcdWithMergeFunction] update success '%s/%s' version '%s' called by '%s'", source.Namespace, source.Name, source.ResourceVersion, caller) - return nil } func (qjm *XController) addOrUpdateCondition(aw *arbv1.AppWrapper, condType arbv1.AppWrapperConditionType, @@ -1570,37 +1533,29 @@ func (qjm *XController) addOrUpdateCondition(aw *arbv1.AppWrapper, condType arbv } func (qjm *XController) backoff(q *arbv1.AppWrapper, reason string, message string) { - - etcUpdateRetrier := retrier.New(retrier.ExponentialBackoff(10, 100*time.Millisecond), &EtcdErrorClassifier{}) - err := etcUpdateRetrier.Run(func() error { - apiCacheAWJob, retryErr := qjm.getAppWrapper(q.Namespace, q.Name, "[backoff] - Rejoining") - if retryErr != nil { - return retryErr - } - q.Status.DeepCopyInto(&apiCacheAWJob.Status) + var workingAW *arbv1.AppWrapper + apiCacheAWJob, e := qjm.queueJobLister.AppWrappers(q.Namespace).Get(q.Name) + // Update condition + if e == nil { + workingAW = apiCacheAWJob apiCacheAWJob.Status.QueueJobState = arbv1.AppWrapperCondBackoff - apiCacheAWJob.Status.FilterIgnore = true // update QueueJobState only, no work needed - // Update condition - qjm.addOrUpdateCondition(apiCacheAWJob, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) - if retryErr := qjm.updateStatusInEtcd(apiCacheAWJob, "[backoff] - Rejoining"); retryErr != nil { - if apierrors.IsConflict(retryErr) { - klog.Warningf("[backoff] Conflict when upating AW status in etcd '%s/%s'. Retrying.", apiCacheAWJob.Namespace, apiCacheAWJob.Name) - } - return retryErr - } - return nil - }) - if err != nil { - klog.Errorf("[backoff] Failed to update status for %s/%s. Continuing with possible stale object without updating conditions. err=%s", q.Namespace, q.Name, err) + workingAW.Status.FilterIgnore = true // update QueueJobState only, no work needed + qjm.addOrUpdateCondition(workingAW, arbv1.AppWrapperCondBackoff, v1.ConditionTrue, reason, message) + //qjm.updateEtcd(workingAW, "backoff - Rejoining") + qjm.updateStatusInEtcd(workingAW, "backoff - Rejoining") + } else { + workingAW = q + klog.Errorf("[backoff] Failed to retrieve cached object for %s/%s. Continuing with possible stale object without updating conditions.", workingAW.Namespace, workingAW.Name) + } - qjm.qjqueue.AddUnschedulableIfNotPresent(q) - klog.V(3).Infof("[backoff] %s move to unschedulableQ before sleep for %d seconds. activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", q.Name, - qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ(q), qjm.qjqueue.IfExistUnschedulableQ(q), q, q.ResourceVersion, q.Status) + qjm.qjqueue.AddUnschedulableIfNotPresent(workingAW) + klog.V(3).Infof("[backoff] %s move to unschedulableQ before sleep for %d seconds. activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", workingAW.Name, + qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ((workingAW)), qjm.qjqueue.IfExistUnschedulableQ((workingAW)), workingAW, workingAW.ResourceVersion, workingAW.Status) time.Sleep(time.Duration(qjm.serverOption.BackoffTime) * time.Second) - qjm.qjqueue.MoveToActiveQueueIfExists(q) + qjm.qjqueue.MoveToActiveQueueIfExists(workingAW) - klog.V(3).Infof("[backoff] '%s/%s' activeQ Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", q.Namespace, q.Name, - qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ(q), qjm.qjqueue.IfExistUnschedulableQ(q), q, q.ResourceVersion, q.Status) + klog.V(3).Infof("[backoff] %s activeQ.Add after sleep for %d seconds. activeQ=%t Unsched=%t &aw=%p Version=%s Status=%+v", workingAW.Name, + qjm.serverOption.BackoffTime, qjm.qjqueue.IfExistActiveQ((workingAW)), qjm.qjqueue.IfExistUnschedulableQ((workingAW)), workingAW, workingAW.ResourceVersion, workingAW.Status) } // Run start AppWrapper Controller @@ -1645,6 +1600,7 @@ func (cc *XController) Run(stopCh chan struct{}) { go wait.Until(cc.agentEventQueueWorker, time.Second, stopCh) // Update Agent Worker } + // go wait.Until(cc.worker, time.Second, stopCh) go wait.Until(cc.worker, 0, stopCh) } @@ -1660,7 +1616,7 @@ func (qjm *XController) UpdateQueueJobs() { // retrieve queueJobs from local cache. no guarantee queueJobs contain up-to-date information queueJobs, err := qjm.queueJobLister.AppWrappers("").List(labels.Everything()) if err != nil { - klog.Errorf("[UpdateQueueJobs] Failed to get a list of active appwrappers, err=%+v", err) + klog.Errorf("[UpdateQueueJobs] List of queueJobs err=%+v", err) return } for _, newjob := range queueJobs { @@ -1677,22 +1633,20 @@ func (qjm *XController) UpdateQueueJobs() { LastTransitionMicroTime: metav1.NowMicro(), }, } - klog.V(6).Infof("[UpdateQueueJobs] Found new appwraper '%s/%s' 0Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", - newjob.Namespace, newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob.CreationTimestamp, newjob.Status.ControllerFirstTimestamp) + klog.V(3).Infof("[UpdateQueueJobs] %s 0Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", + newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob.CreationTimestamp, newjob.Status.ControllerFirstTimestamp) } //only set if appwrapper is running and dispatch time is not set previously if newjob.Status.QueueJobState == "Running" && newjob.Status.ControllerFirstDispatchTimestamp.String() == "0001-01-01 00:00:00 +0000 UTC" { newjob.Status.ControllerFirstDispatchTimestamp = firstTime } - klog.V(6).Infof("[UpdateQueueJobs] %s: qjqueue=%t &qj=%p Version=%s Status=%+v", newjob.Name, qjm.qjqueue.IfExist(newjob), newjob, newjob.ResourceVersion, newjob.Status) + klog.V(10).Infof("[UpdateQueueJobs] %s: qjqueue=%t &qj=%p Version=%s Status=%+v", newjob.Name, qjm.qjqueue.IfExist(newjob), newjob, newjob.ResourceVersion, newjob.Status) // check eventQueue, qjqueue in program sequence to make sure job is not in qjqueue if _, exists, _ := qjm.eventQueue.Get(newjob); exists { - klog.V(6).Infof("[UpdateQueueJobs] app wrapper %s/%s found in the event queue, not adding it", newjob.Namespace, newjob.Name) continue } // do not enqueue if already in eventQueue if qjm.qjqueue.IfExist(newjob) { - klog.V(6).Infof("[UpdateQueueJobs] app wrapper %s/%s found in the job queue, not adding it", newjob.Namespace, newjob.Name) continue } // do not enqueue if already in qjqueue @@ -1700,7 +1654,7 @@ func (qjm *XController) UpdateQueueJobs() { if err != nil { klog.Errorf("[UpdateQueueJobs] Fail to enqueue %s to eventQueue, ignore. *Delay=%.6f seconds &qj=%p Version=%s Status=%+v err=%#v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status, err) } else { - klog.V(6).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) + klog.V(4).Infof("[UpdateQueueJobs] %s *Delay=%.6f seconds eventQueue.Add_byUpdateQueueJobs &qj=%p Version=%s Status=%+v", newjob.Name, time.Now().Sub(newjob.Status.ControllerFirstTimestamp.Time).Seconds(), newjob, newjob.ResourceVersion, newjob.Status) } } } @@ -1730,10 +1684,10 @@ func (cc *XController) addQueueJob(obj interface{}) { qj.Name, qj.Status.State, qj.Status.ControllerFirstTimestamp, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds()) } - klog.V(6).Infof("[Informer-addQJ] %s Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", + klog.V(10).Infof("[Informer-addQJ] %s Delay=%.6f seconds CreationTimestamp=%s ControllerFirstTimestamp=%s", qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.CreationTimestamp, qj.Status.ControllerFirstTimestamp) - klog.V(6).Infof("[Informer-addQJ] enqueue %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[Informer-addQJ] enqueue %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) cc.enqueue(qj) } @@ -1746,20 +1700,22 @@ func (cc *XController) updateQueueJob(oldObj, newObj interface{}) { oldQJ, ok := oldObj.(*arbv1.AppWrapper) if !ok { klog.Errorf("[Informer-updateQJ] old object is not AppWrapper. enqueue(newQJ). oldObj=%+v", oldObj) + klog.V(4).Infof("[Informer-updateQJ] %s *Delay=%.6f seconds BadOldObject enqueue &newQJ=%p Version=%s Status=%+v", newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ, newQJ.ResourceVersion, newQJ.Status) + //cc.enqueue(newQJ) return } // AppWrappers may come out of order. Ignore old ones. - if (oldQJ.Namespace == newQJ.Namespace) && (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { - klog.V(6).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Namespace, oldQJ.Name, oldQJ, oldQJ) - klog.V(6).Infof("[Informer-updateQJ] '%s/%s' ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Namespace, newQJ.Name, newQJ, newQJ) + if (oldQJ.Name == newQJ.Name) && (larger(oldQJ.ResourceVersion, newQJ.ResourceVersion)) { + klog.V(10).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &oldQJ=%p oldQJ=%+v", oldQJ.Name, oldQJ, oldQJ) + klog.V(10).Infof("[Informer-updateQJ] %s ignored OutOfOrder arrival &newQJ=%p newQJ=%+v", newQJ.Name, newQJ, newQJ) return } if equality.Semantic.DeepEqual(newQJ.Status, oldQJ.Status) { - klog.V(6).Infof("[Informer-updateQJ] No change to status field of AppWrapper: '%s/%s', oldAW=%+v, newAW=%+v.", newQJ.Namespace, newQJ.Name, oldQJ.Status, newQJ.Status) + klog.V(10).Infof("[Informer-updateQJ] No change to status field of AppWrapper: %s, oldAW=%+v, newAW=%+v.", newQJ.Name, oldQJ.Status, newQJ.Status) } - klog.V(6).Infof("[Informer-updateQJ] '%s/%s' *Delay=%.6f seconds normal enqueue Version=%s Status=%v", newQJ.Namespace, newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ.ResourceVersion, newQJ.Status) + klog.V(3).Infof("[Informer-updateQJ] %s *Delay=%.6f seconds normal enqueue &newQJ=%p Version=%s Status=%+v", newQJ.Name, time.Now().Sub(newQJ.Status.ControllerFirstTimestamp.Time).Seconds(), newQJ, newQJ.ResourceVersion, newQJ.Status) cc.enqueue(newQJ) } @@ -1882,7 +1838,7 @@ func (cc *XController) worker() { klog.Errorf("[worker] Panic occurred error: %v, stacktrace: %s", pErr, string(debug.Stack())) } }() - item, err := cc.eventQueue.Pop(func(obj interface{}) error { + if _, err := cc.eventQueue.Pop(func(obj interface{}) error { var queuejob *arbv1.AppWrapper switch v := obj.(type) { case *arbv1.AppWrapper: @@ -1891,11 +1847,11 @@ func (cc *XController) worker() { klog.Errorf("[worker] eventQueue.Pop un-supported type. obj=%+v", obj) return nil } - klog.V(10).Infof("[worker] '%s/%s' *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", queuejob.Namespace, queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) + klog.V(10).Infof("[worker] %s *Delay=%.6f seconds eventQueue.Pop_begin &newQJ=%p Version=%s Status=%+v", queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) if queuejob == nil { if acc, err := meta.Accessor(obj); err != nil { - klog.Warningf("[worker] Failed to get AppWrapper for '%s/%s'", acc.GetNamespace(), acc.GetName()) + klog.Warningf("[worker] Failed to get AppWrapper for %v/%v", acc.GetNamespace(), acc.GetName()) } return nil @@ -1903,69 +1859,45 @@ func (cc *XController) worker() { // sync AppWrapper if err := cc.syncQueueJob(queuejob); err != nil { - klog.Errorf("[worker] Failed to sync AppWrapper '%s/%s', err %#v", queuejob.Namespace, queuejob.Name, err) + klog.Errorf("[worker] Failed to sync AppWrapper %s, err %#v", queuejob.Name, err) // If any error, requeue it. return err } klog.V(10).Infof("[worker] Ending %s Delay=%.6f seconds &newQJ=%p Version=%s Status=%+v", queuejob.Name, time.Now().Sub(queuejob.Status.ControllerFirstTimestamp.Time).Seconds(), queuejob, queuejob.ResourceVersion, queuejob.Status) return nil - }) - if err != nil { - klog.Warningf("[worker] Fail to process item from eventQueue, err %v. Attempting to re-enqueque...", err) - if err00 := cc.enqueueIfNotPresent(item); err00 != nil { - klog.Errorf("[worker] Fatal error railed to re-enqueue item, err %v", err00) - } else { - klog.Warning("[worker] Item re-enqueued") - } + }); err != nil { + klog.Errorf("[worker] Fail to pop item from eventQueue, err %#v", err) return } } func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { - // validate that app wraper has not been marked for deletion by the infomer's delete handler - if qj.DeletionTimestamp != nil { - klog.V(3).Infof("[syncQueueJob] AW job=%s/%s set for deletion.", qj.Namespace, qj.Name) - // cleanup resources for running job, ignoring errors - if err00 := cc.Cleanup(qj); err00 != nil { - klog.Warningf("Failed to cleanup resources for app wrapper '%s/%s', err = %v", qj.Namespace, qj.Name, err00) - } - //empty finalizers and delete the queuejob again - if accessor, err00 := meta.Accessor(qj); err00 == nil { - accessor.SetFinalizers(nil) - } - // we delete the job from the queue if it is there, ignoring errors - cc.qjqueue.Delete(qj) - klog.V(3).Infof("[syncQueueJob] AW job=%s/%s deleted.", qj.Namespace, qj.Name) - return nil - } - cacheAWJob, err := cc.getAppWrapper(qj.Namespace, qj.Name, "[syncQueueJob] get fresh appwrapper ") + cacheAWJob, err := cc.queueJobLister.AppWrappers(qj.Namespace).Get(qj.Name) if err != nil { + klog.V(10).Infof("[syncQueueJob] AppWrapper %s not found in cache: info=%+v", qj.Name, err) + // Implicit detection of deletion if apierrors.IsNotFound(err) { - klog.Warningf("[syncQueueJob] AppWrapper '%s/%s' not found in cache and will be deleted", qj.Namespace, qj.Name) - // clean up app wrapper resources including quota - if err := cc.Cleanup(qj); err != nil { - klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err) - // return error so operation can be retried - return err - } + //if (cc.isDispatcher) { + cc.Cleanup(qj) cc.qjqueue.Delete(qj) + //} return nil } return err } - klog.V(10).Infof("[syncQueueJob] Cache AW '%s/%s' &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[syncQueueJob] Cache AW %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) // make sure qj has the latest information - if larger(cacheAWJob.ResourceVersion, qj.ResourceVersion) { - klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &qj=%p qj=%+v", qj.Namespace, qj.Name, qj, qj) - klog.V(5).Infof("[syncQueueJob] '%s/%s' found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Namespace, cacheAWJob.Name, cacheAWJob, cacheAWJob) + if larger(qj.ResourceVersion, qj.ResourceVersion) { + klog.V(10).Infof("[syncQueueJob] %s found more recent copy from cache &qj=%p qj=%+v", qj.Name, qj, qj) + klog.V(10).Infof("[syncQueueJobJ] %s found more recent copy from cache &cacheAWJob=%p cacheAWJob=%+v", cacheAWJob.Name, cacheAWJob, cacheAWJob) cacheAWJob.DeepCopyInto(qj) } // If it is Agent (not a dispatcher), update pod information podPhaseChanges := false - if !cc.isDispatcher { // agent mode + if !cc.isDispatcher { //Make a copy first to not update cache object and to use for comparing awNew := qj.DeepCopy() // we call sync to update pods running, pending,... @@ -1973,10 +1905,8 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { err := cc.qjobResControls[arbv1.ResourceTypePod].UpdateQueueJobStatus(awNew) if err != nil { klog.Errorf("[syncQueueJob] Error updating pod status counts for AppWrapper job: %s, err=%+v", qj.Name, err) - return err } - klog.Infof("[syncQueueJob] Pod counts updated for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", awNew.Namespace, awNew.Name, awNew.ResourceVersion, - awNew.Status.CanRun, awNew.Status.State, awNew.Status.Pending, awNew.Status.Running, awNew.Status.Succeeded, awNew.Status.Failed) + klog.V(10).Infof("[syncQueueJob] AW popped from event queue %s &qj=%p Version=%s Status=%+v", awNew.Name, awNew, awNew.ResourceVersion, awNew.Status) // Update etcd conditions if AppWrapper Job has at least 1 running pod and transitioning from dispatched to running. if (awNew.Status.QueueJobState != arbv1.AppWrapperCondRunning) && (awNew.Status.Running > 0) { @@ -1984,12 +1914,7 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondRunning, v1.ConditionTrue, "PodsRunning", "") awNew.Status.Conditions = append(awNew.Status.Conditions, cond) awNew.Status.FilterIgnore = true // Update AppWrapperCondRunning - err := cc.updateStatusInEtcdWithRetry(awNew, "[syncQueueJob] Update pod counts") - if err != nil { - klog.Error("[syncQueueJob] Error updating pod status counts for app wrapper job: '%s/%s', err=%+v.", qj.Namespace, qj.Name, err) - return err - } - return nil + cc.updateEtcd(awNew, "[syncQueueJob] setRunning") } //For debugging? @@ -1998,31 +1923,57 @@ func (cc *XController) syncQueueJob(qj *arbv1.AppWrapper) error { // Using DeepCopy before DeepCopyInto as it seems that DeepCopyInto does not alloc a new memory object awNewStatus := awNew.Status.DeepCopy() awNewStatus.DeepCopyInto(&qj.Status) - klog.V(4).Infof("[syncQueueJob] AW pod phase change(s) detected '%s/%s' &eventqueueaw=%p eventqueueawVersion=%s eventqueueawStatus=%+v; &newaw=%p newawVersion=%s newawStatus=%+v", - qj.Namespace, qj.Name, qj, qj.ResourceVersion, qj.Status, awNew, awNew.ResourceVersion, awNew.Status) + //awNew.Status.DeepCopy().DeepCopyInto(&qj.Status) + klog.V(10).Infof("[syncQueueJob] AW pod phase change(s) detected %s &eventqueueaw=%p eventqueueawVersion=%s eventqueueawStatus=%+v; &newaw=%p newawVersion=%s newawStatus=%+v", + qj.Name, qj, qj.ResourceVersion, qj.Status, awNew, awNew.ResourceVersion, awNew.Status) } } } - err = cc.manageQueueJob(qj, podPhaseChanges) - return err + return cc.manageQueueJob(qj, podPhaseChanges) + //return cc.manageQueueJob(cacheAWJob) } // manageQueueJob is the core method responsible for managing the number of running // pods according to what is specified in the job.Spec. // Does NOT modify . func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool) error { + var err error + startTime := time.Now() + defer func() { + klog.V(10).Infof("[worker-manageQJ] Ending %s manageQJ time=%s &qj=%p Version=%s Status=%+v", qj.Name, time.Now().Sub(startTime), qj, qj.ResourceVersion, qj.Status) + }() if !cc.isDispatcher { // Agent Mode + + if qj.DeletionTimestamp != nil { + + klog.V(4).Infof("[manageQueueJob] AW job=%s/%s set for deletion.", qj.Name, qj.Namespace) + + // cleanup resources for running job + err = cc.Cleanup(qj) + if err != nil { + return err + } + //empty finalizers and delete the queuejob again + accessor, err := meta.Accessor(qj) + if err != nil { + return err + } + accessor.SetFinalizers(nil) + + // we delete the job from the queue if it is there + cc.qjqueue.Delete(qj) + + return nil + } //Job is Complete only update pods if needed. if qj.Status.State == arbv1.AppWrapperStateCompleted || qj.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { if podPhaseChanges { // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - podPhaseChanges") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating status for podPhaseChanges for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) - return err + if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) } } return nil @@ -2036,56 +1987,53 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool // If this the first time seeing this AW, no need to delete. stateLen := len(qj.Status.State) if stateLen > 0 { - klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job '%s/%s' because it was preempted, status.CanRun=%t, status.State=%s", qj.Namespace, qj.Name, qj.Status.CanRun, qj.Status.State) - err00 := cc.Cleanup(qj) - if err00 != nil { - klog.Errorf("[manageQueueJob] Failed to delete resources for AppWrapper Job '%s/%s', err=%v", qj.Namespace, qj.Name, err00) - return err00 + klog.V(2).Infof("[manageQueueJob] Deleting resources for AppWrapper Job %s because it was preempted, status=%+v\n", qj.Name, qj.Status) + err = cc.Cleanup(qj) + klog.V(8).Infof("[manageQueueJob] Validation after deleting resources for AppWrapper Job %s because it was be preempted, status=%+v\n", qj.Name, qj.Status) + if err != nil { + klog.Errorf("[manageQueueJob] Fail to delete resources for AppWrapper Job %s, err=%#v", qj.Name, err) + return err } - klog.V(2).Infof("[manageQueueJob] Delete resources for AppWrapper Job '%s/%s' due to preemption was sucessfull, status.CanRun=%t, status.State=%s", qj.Namespace, qj.Name, qj.Status.CanRun, qj.Status.State) } qj.Status.State = arbv1.AppWrapperStateEnqueued // add qj to qjqueue only when it is not in UnschedulableQ if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - return nil - } - - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") - if index < 0 { - qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions = append(qj.Status.Conditions, cond) + klog.V(10).Infof("[worker-manageQJ] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } else { - cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") - qj.Status.Conditions[index] = *cond.DeepCopy() - } + klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + index := getIndexOfMatchedCondition(qj, arbv1.AppWrapperCondQueueing, "AwaitingHeadOfLine") + if index < 0 { + qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions = append(qj.Status.Conditions, cond) + } else { + cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondQueueing, v1.ConditionTrue, "AwaitingHeadOfLine", "") + qj.Status.Conditions[index] = *cond.DeepCopy() + } - qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - setQueueing") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating status 'setQueueing' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) - return err - } - klog.V(10).Infof("[manageQueueJob] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) - if err00 := cc.qjqueue.AddIfNotPresent(qj); err00 != nil { - klog.Errorf("manageQueueJob] Failed to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) - cc.enqueue(qj) - } else { - klog.V(3).Infof("[manageQueueJob] Added '%s/%s' to activeQueue queue 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext + cc.updateEtcd(qj, "manageQueueJob - setQueueing") + klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { + klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) + cc.enqueue(qj) + } else { + klog.V(3).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + } } return nil - } - // Handle recovery condition - if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { - // One more check to ensure AW is not the current active scheduled object - if !cc.IsActiveAppWrapper(qj.Name, qj.Namespace) { + } // End of first execution of qj to add to qjqueue for ScheduleNext + + //Handle recovery condition + if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && + !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { + // One more check to ensure AW is not the current active schedule object + if cc.schedulingAW.IsActiveAppWrapper(qj.Name, qj.Namespace) { cc.qjqueue.AddIfNotPresent(qj) - klog.V(6).Infof("[manageQueueJob] Recovered AppWrapper '%s/%s' - added to active queue, Status=%+v", + klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", qj.Namespace, qj.Name, qj.Status) return nil } @@ -2099,50 +2047,42 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.PendingPodConditions = nil qj.Status.State = arbv1.AppWrapperStateActive + // Bugfix to eliminate performance problem of overloading the event queue.} + if qj.Spec.AggrResources.Items != nil { for i := range qj.Spec.AggrResources.Items { - err00 := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { + err := cc.refManager.AddTag(&qj.Spec.AggrResources.Items[i], func() string { return strconv.Itoa(i) }) - if err00 != nil { - klog.Warningf("Failed to add tag to aggregate resource item %s of app apprapper '%s/%s', err = %v", qj.Spec.AggrResources.Items[i].Name, qj.Namespace, qj.Name, err00) + if err != nil { + return err } } } - klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' BeforeDispatchingToEtcd Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) + klog.V(3).Infof("[worker-manageQJ] %s 3Delay=%.6f seconds BeforeDispatchingToEtcd Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) dispatched := true dispatchFailureReason := "ItemCreationFailure." dispatchFailureMessage := "" for _, ar := range qj.Spec.AggrResources.Items { - klog.V(10).Infof("[manageQueueJob] before dispatch [%v].SyncQueueJob %s Version=%s Status.CanRun=%t, Status.State=%s", ar.Type, qj.Name, qj.ResourceVersion, qj.Status.CanRun, qj.Status.State) + klog.V(10).Infof("[worker-manageQJ] before dispatch [%v].SyncQueueJob %s &qj=%p Version=%s Status=%+v", ar.Type, qj.Name, qj, qj.ResourceVersion, qj.Status) // Call Resource Controller of ar.Type to issue REST call to Etcd for resource creation err00 := cc.qjobResControls[ar.Type].SyncQueueJob(qj, &ar) if err00 != nil { - if apierrors.IsInvalid(err00) { - klog.Warningf("[manageQueueJob] Invalid item sent for dispatching by app wrapper='%s/%s' type=%v", qj.Namespace, qj.Name, ar.Type) - } else { - klog.Errorf("[manageQueueJob] Error dispatching item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, ar.Type, err00) - } dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + klog.V(3).Infof("[worker-manageQJ] Error dispatching job=%s type=%v Status=%+v err=%+v", qj.Name, ar.Type, qj.Status, err00) dispatched = false break } } - if dispatched { - // Handle generic resources - for _, ar := range qj.Spec.AggrResources.GenericItems { - klog.V(10).Infof("[manageQueueJob] before dispatch Generic.SyncQueueJob %s Version=%sStatus.CanRun=%t, Status.State=%s", qj.Name, qj.ResourceVersion, qj.Status.CanRun, qj.Status.State) - _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) - if err00 != nil { - if apierrors.IsInvalid(err00) { - klog.Warningf("[manageQueueJob] Invalid generic item sent for dispatching by app wrapper='%s/%s' err=%v", qj.Namespace, qj.Name, err00) - } else { - klog.Errorf("[manageQueueJob] Error dispatching generic item for app wrapper='%s/%s' type=%v err=%v", qj.Namespace, qj.Name, err00) - } - dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) - klog.Errorf("[manageQueueJob] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) - dispatched = false - } + // Handle generic resources + for _, ar := range qj.Spec.AggrResources.GenericItems { + klog.V(10).Infof("[worker-manageQJ] before dispatch Generic.SyncQueueJob %s &qj=%p Version=%s Status=%+v", qj.Name, qj, qj.ResourceVersion, qj.Status) + _, err00 := cc.genericresources.SyncQueueJob(qj, &ar) + if err00 != nil { + dispatchFailureMessage = fmt.Sprintf("%s/%s creation failure: %+v", qj.Namespace, qj.Name, err00) + klog.Errorf("[worker-manageQJ] Error dispatching job=%s Status=%+v err=%+v", qj.Name, qj.Status, err00) + dispatched = false } } @@ -2157,43 +2097,30 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.Conditions[index] = *cond.DeepCopy() } - klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' after DispatchingToEtcd Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) + klog.V(3).Infof("[worker-manageQJ] %s 4Delay=%.6f seconds AllResourceDispatchedToEtcd Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), qj.ResourceVersion, qj.Status) } else { - klog.V(4).Infof("[manageQueueJob] App wrapper '%s/%s' failed dispatching Version=%s Status=%+v", qj.Namespace, qj.Name, qj.ResourceVersion, qj.Status) - qj.Status.State = arbv1.AppWrapperStateFailed qj.Status.QueueJobState = arbv1.AppWrapperCondFailed if !isLastConditionDuplicate(qj, arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) { cond := GenerateAppWrapperCondition(arbv1.AppWrapperCondFailed, v1.ConditionTrue, dispatchFailureReason, dispatchFailureMessage) qj.Status.Conditions = append(qj.Status.Conditions, cond) } - // clean up app wrapper resources including quota - if err00 := cc.Cleanup(qj); err00 != nil { - klog.Errorf("Failed to delete resources associated with app wrapper: '%s/%s', err %v", qj.Namespace, qj.Name, err00) - // return error so operation can be retried - return err00 - } - cc.qjqueue.Delete(qj) + cc.Cleanup(qj) } + // TODO(k82cn): replaced it with `UpdateStatus` qj.Status.FilterIgnore = true // update State & QueueJobState after dispatch - err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - afterEtcdDispatching") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating status 'afterEtcdDispatching' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) + if err := cc.updateEtcd(qj, "manageQueueJob - afterEtcdDispatching"); err != nil { + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) return err } - return nil - } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { - klog.Infof("[manageQueueJob] Getting completion status for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", qj.Namespace, qj.Name, qj.ResourceVersion, - qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) + } else if qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateActive { //set appwrapper status to Complete or RunningHoldCompletion derivedAwStatus := cc.getAppWrapperCompletionStatus(qj) - klog.Infof("[manageQueueJob] Got completion status '%s' for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", derivedAwStatus, qj.Namespace, qj.Name, qj.ResourceVersion, - qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) - //Set Appwrapper state to complete if all items in Appwrapper //are completed if derivedAwStatus == arbv1.AppWrapperStateRunningHoldCompletion { @@ -2211,11 +2138,7 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.Conditions[index] = *cond.DeepCopy() updateQj = qj.DeepCopy() } - err := cc.updateStatusInEtcdWithRetry(updateQj, "[manageQueueJob] setRunningHoldCompletion") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating status 'setRunningHoldCompletion' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) - return err - } + cc.updateEtcd(updateQj, "[syncQueueJob] setRunningHoldCompletion") } //Set appwrapper status to complete if derivedAwStatus == arbv1.AppWrapperStateCompleted { @@ -2234,134 +2157,146 @@ func (cc *XController) manageQueueJob(qj *arbv1.AppWrapper, podPhaseChanges bool qj.Status.Conditions[index] = *cond.DeepCopy() updateQj = qj.DeepCopy() } - err := cc.updateStatusInEtcdWithRetry(updateQj, "[manageQueueJob] setCompleted") - if err != nil { - if cc.quotaManager != nil { - cc.quotaManager.Release(updateQj) - } - klog.Errorf("[manageQueueJob] Error updating status 'setCompleted' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) - return err - } - if cc.quotaManager != nil { + cc.updateEtcd(updateQj, "[syncQueueJob] setCompleted") + if cc.serverOption.QuotaEnabled { cc.quotaManager.Release(updateQj) } } - klog.Infof("[manageQueueJob] Done getting completion status for app wrapper '%s/%s' Version=%s Status.CanRun=%t Status.State=%s, pod counts [Pending: %d, Running: %d, Succeded: %d, Failed %d]", qj.Namespace, qj.Name, qj.ResourceVersion, - qj.Status.CanRun, qj.Status.State, qj.Status.Pending, qj.Status.Running, qj.Status.Succeeded, qj.Status.Failed) + // Bugfix to eliminate performance problem of overloading the event queue. } else if podPhaseChanges { // Continued bug fix // Only update etcd if AW status has changed. This can happen for periodic // updates of pod phase counts done in caller of this function. - err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - podPhaseChanges") + if err := cc.updateEtcd(qj, "manageQueueJob - podPhaseChanges"); err != nil { + klog.Errorf("[manageQueueJob] Error updating etc for AW job=%s Status=%+v err=%+v", qj.Name, qj.Status, err) + } + } + // Finish adding qj to Etcd for dispatch + + } else { // Dispatcher Mode + + if qj.DeletionTimestamp != nil { + // cleanup resources for running job + err = cc.Cleanup(qj) if err != nil { - klog.Errorf("[manageQueueJob] Error updating status 'podPhaseChanges' AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) return err } + //empty finalizers and delete the queuejob again + accessor, err := meta.Accessor(qj) + if err != nil { + return err + } + accessor.SetFinalizers(nil) + + cc.qjqueue.Delete(qj) + + return nil } - return nil - } else { // Dispatcher Mode if !qj.Status.CanRun && (qj.Status.State != arbv1.AppWrapperStateEnqueued && qj.Status.State != arbv1.AppWrapperStateDeleted) { // if there are running resources for this job then delete them because the job was put in // pending state... - klog.V(3).Infof("[manageQueueJob] [Dispatcher] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) - err00 := cc.Cleanup(qj) - if err00 != nil { - klog.Errorf("Failed to clean up resources for app wrapper '%s/%s', err =%v", qj.Namespace, qj.Name, err00) - return err00 + klog.V(3).Infof("[worker-manageQJ] Deleting AppWrapper resources because it will be preempted! %s", qj.Name) + err = cc.Cleanup(qj) + if err != nil { + return err } qj.Status.State = arbv1.AppWrapperStateEnqueued if cc.qjqueue.IfExistUnschedulableQ(qj) { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] leaving '%s/%s' to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[worker-manageQJ] leaving %s to qjqueue.UnschedulableQ activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } else { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] before add to activeQ '%s/%s' activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(10).Infof("[worker-manageQJ] before add to activeQ %s activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) qj.Status.QueueJobState = arbv1.AppWrapperCondQueueing qj.Status.FilterIgnore = true // Update Queueing status, add to qjqueue for ScheduleNext - err := cc.updateStatusInEtcdWithRetry(qj, "manageQueueJob - setQueueing") - if err != nil { - klog.Errorf("[manageQueueJob] Error updating status 'setQueueing' for AppWrapper: '%s/%s',Status=%+v, err=%+v.", qj.Namespace, qj.Name, qj.Status, err) - return err - } - if err00 = cc.qjqueue.AddIfNotPresent(qj); err00 != nil { - klog.Errorf("[manageQueueJob] [Dispatcher] Fail to add '%s/%s' to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", - qj.Namespace, qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err00) + cc.updateEtcd(qj, "manageQueueJob - setQueueing") + if err = cc.qjqueue.AddIfNotPresent(qj); err != nil { + klog.Errorf("[worker-manageQJ] Fail to add %s to activeQueue. Back to eventQueue activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v err=%#v", + qj.Name, cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status, err) cc.enqueue(qj) } else { - klog.V(4).Infof("[manageQueueJob] [Dispatcher] '%s/%s' 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", - qj.Namespace, qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) + klog.V(4).Infof("[worker-manageQJ] %s 1Delay=%.6f seconds activeQ.Add_success activeQ=%t Unsched=%t &qj=%p Version=%s Status=%+v", + qj.Name, time.Now().Sub(qj.Status.ControllerFirstTimestamp.Time).Seconds(), cc.qjqueue.IfExistActiveQ(qj), cc.qjqueue.IfExistUnschedulableQ(qj), qj, qj.ResourceVersion, qj.Status) } } + + //_, err = cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj) + //if err != nil { + // return err + //} return nil } + + // if !qj.Status.CanRun && qj.Status.State == arbv1.QueueJobStateEnqueued { if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued { cc.qjqueue.AddIfNotPresent(qj) return nil } + if qj.Status.CanRun && !qj.Status.IsDispatched { if klog.V(10).Enabled() { current_time := time.Now() - klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ '%s/%s' has Overhead Before Dispatching: %s", qj.Namespace, qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[manageQueueJob] [Dispatcher] '%s/%s', %s: WorkerBeforeDispatch", qj.Namespace, qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[worker-manageQJ] XQJ %s has Overhead Before Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[TTime] %s, %s: WorkerBeforeDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } queuejobKey, _ := GetQueueJobKey(qj) + // agentId:=cc.dispatchMap[queuejobKey] + // if agentId!=nil { if agentId, ok := cc.dispatchMap[queuejobKey]; ok { - klog.V(10).Infof("[manageQueueJob] [Dispatcher] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) + klog.V(10).Infof("[Dispatcher Controller] Dispatched AppWrapper %s to Agent ID: %s.", qj.Name, agentId) cc.agentMap[agentId].CreateJob(qj) qj.Status.IsDispatched = true } else { - klog.Errorf("[manageQueueJob] [Dispatcher] AppWrapper %s not found in dispatcher mapping.", qj.Name) + klog.Errorf("[Dispatcher Controller] AppWrapper %s not found in dispatcher mapping.", qj.Name) } if klog.V(10).Enabled() { current_time := time.Now() - klog.V(10).Infof("[manageQueueJob] [Dispatcher] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) - klog.V(10).Infof("[manageQueueJob] [Dispatcher] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[Dispatcher Controller] XQJ %s has Overhead After Dispatching: %s", qj.Name, current_time.Sub(qj.CreationTimestamp.Time)) + klog.V(10).Infof("[TTime] %s, %s: WorkerAfterDispatch", qj.Name, time.Now().Sub(qj.CreationTimestamp.Time)) } - err := cc.updateStatusInEtcdWithRetry(qj, "[manageQueueJob] [Dispatcher] -- set dispatched true") - if err != nil { - klog.Errorf("Failed to update status of AppWrapper %s/%s: err=%v", qj.Namespace, qj.Name, err) + + if _, err := cc.arbclients.ArbV1().AppWrappers(qj.Namespace).Update(qj); err != nil { + klog.Errorf("Failed to update status of AppWrapper %v/%v: %v", + qj.Namespace, qj.Name, err) return err } } - return nil + } + return err } // Cleanup function func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { - klog.V(3).Infof("[Cleanup] begin AppWrapper '%s/%s' Version=%s", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion) - var err *multierror.Error + klog.V(3).Infof("[Cleanup] begin AppWrapper %s Version=%s Status=%+v\n", appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) + if !cc.isDispatcher { if appwrapper.Spec.AggrResources.Items != nil { // we call clean-up for each controller for _, ar := range appwrapper.Spec.AggrResources.Items { err00 := cc.qjobResControls[ar.Type].Cleanup(appwrapper, &ar) - if err00 != nil && !apierrors.IsNotFound(err00) { - klog.Errorf("[Cleanup] Error deleting item %s from app wrapper='%s/%s' err=%v.", - ar.Type, appwrapper.Namespace, appwrapper.Name, err00) - err = multierror.Append(err, err00) - continue + if err00 != nil { + klog.Errorf("[Cleanup] Error deleting item %s from job=%s Status=%+v err=%+v.", + ar.Type, appwrapper.Name, appwrapper.Status, err00) } - klog.V(3).Infof("[Cleanup] Deleted item from app wrapper='%s/%s'", - appwrapper.Namespace, appwrapper.Name) } } if appwrapper.Spec.AggrResources.GenericItems != nil { for _, ar := range appwrapper.Spec.AggrResources.GenericItems { genericResourceName, gvk, err00 := cc.genericresources.Cleanup(appwrapper, &ar) - if err00 != nil && !apierrors.IsNotFound(err00) { - klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper='%s/%s' err=%v.", - genericResourceName, appwrapper.Namespace, appwrapper.Name, err00) - err = multierror.Append(err, err00) - continue + if err00 != nil { + klog.Errorf("[Cleanup] Error deleting generic item %s, from app wrapper=%s Status=%+v err=%+v.", + genericResourceName, appwrapper.Name, appwrapper.Status, err00) } - klog.V(3).Infof("[Cleanup] Deleted generic item %s, GVK=%s.%s.%s from app wrapper='%s/%s'", - genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Namespace, appwrapper.Name) + klog.Info("[Cleanup] Delete generic item %s, GVK=%s.%s.%s from app wrapper=%s Status=%+v", + genericResourceName, gvk.Group, gvk.Version, gvk.Kind, appwrapper.Name, appwrapper.Status) } } } else { + // klog.Infof("[Dispatcher] Cleanup: State=%s\n", appwrapper.Status.State) + //if ! appwrapper.Status.CanRun && appwrapper.Status.IsDispatched { if appwrapper.Status.IsDispatched { queuejobKey, _ := GetQueueJobKey(appwrapper) if obj, ok := cc.dispatchMap[queuejobKey]; ok { @@ -2379,67 +2314,7 @@ func (cc *XController) Cleanup(appwrapper *arbv1.AppWrapper) error { appwrapper.Status.Running = 0 appwrapper.Status.Succeeded = 0 appwrapper.Status.Failed = 0 - klog.V(3).Infof("[Cleanup] end AppWrapper '%s/%s' Version=%s", appwrapper.Namespace, appwrapper.Name, appwrapper.ResourceVersion) - - return err.ErrorOrNil() -} -func (cc *XController) getAppWrapper(namespace string, name string, caller string) (*arbv1.AppWrapper, error) { - klog.V(5).Infof("[getAppWrapper] getting a copy of '%s/%s' when called by '%s'.", namespace, name, caller) - apiCacheAWJob, err := cc.queueJobLister.AppWrappers(namespace).Get(name) - if err != nil { - if !apierrors.IsNotFound(err) { - klog.Errorf("[getAppWrapper] getting a copy of '%s/%s' failed, when called by '%s', err=%v", namespace, name, caller, err) - } - return nil, err - } - klog.V(5).Infof("[getAppWrapper] get a copy of '%s/%s' suceeded when called by '%s'", namespace, name, caller) - return apiCacheAWJob.DeepCopy(), nil -} - -type EtcdErrorClassifier struct { -} - -func (c *EtcdErrorClassifier) Classify(err error) retrier.Action { - if err == nil { - return retrier.Succeed - } else if apierrors.IsConflict(err) { - return retrier.Retry - } else { - return retrier.Fail - } -} + klog.V(10).Infof("[Cleanup] end AppWrapper %s Version=%s Status=%+v\n", appwrapper.Name, appwrapper.ResourceVersion, appwrapper.Status) -// IsActiveAppWrapper safely performs the comparison that was done inside the if block -// at line 1977 in the queuejob_controller_ex.go -// The code looked like this: -// -// if !qj.Status.CanRun && qj.Status.State == arbv1.AppWrapperStateEnqueued && -// !cc.qjqueue.IfExistUnschedulableQ(qj) && !cc.qjqueue.IfExistActiveQ(qj) { -// // One more check to ensure AW is not the current active schedule object -// if cc.schedulingAW == nil || -// (strings.Compare(cc.schedulingAW.Namespace, qj.Namespace) != 0 && -// strings.Compare(cc.schedulingAW.Name, qj.Name) != 0) { -// cc.qjqueue.AddIfNotPresent(qj) -// klog.V(3).Infof("[manageQueueJob] Recovered AppWrapper %s%s - added to active queue, Status=%+v", -// qj.Namespace, qj.Name, qj.Status) -// return nil -// } -// } - -func (cc *XController) IsActiveAppWrapper(name, namespace string) bool { - cc.schedulingMutex.RLock() - defer cc.schedulingMutex.RUnlock() - if cc.schedulingAW == nil { - klog.V(6).Info("[IsActiveAppWrapper] No active scheduling app wrapper set") - } else { - klog.V(6).Infof("[IsActiveAppWrapper] Active scheduling app wrapper is : '%s/%s'", cc.schedulingAW.Namespace, cc.schedulingAW.Name) - } - return cc.schedulingAW != nil && - (strings.Compare(cc.schedulingAW.Namespace, namespace) != 0 && - strings.Compare(cc.schedulingAW.Name, name) != 0) -} -func (qjm *XController) schedulingAWAtomicSet(qj *arbv1.AppWrapper) { - qjm.schedulingMutex.Lock() - qjm.schedulingAW = qj - qjm.schedulingMutex.Unlock() + return nil } diff --git a/pkg/controller/queuejobresources/genericresource/genericresource.go b/pkg/controller/queuejobresources/genericresource/genericresource.go index e45e380df..4b2879305 100644 --- a/pkg/controller/queuejobresources/genericresource/genericresource.go +++ b/pkg/controller/queuejobresources/genericresource/genericresource.go @@ -5,7 +5,7 @@ Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, @@ -173,10 +173,12 @@ func (gr *GenericResources) Cleanup(aw *arbv1.AppWrapper, awr *arbv1.AppWrapperG err = deleteObject(namespaced, namespace, newName, rsrc, dclient) if err != nil { - if !errors.IsNotFound(err) { + if errors.IsAlreadyExists(err) { + klog.V(4).Infof("%v\n", err.Error()) + } else { klog.Errorf("[Cleanup] Error deleting the object `%v`, the error is `%v`.", newName, errors.ReasonForError(err)) + return name, gvk, err } - return name, gvk, err } } else { klog.Warningf("[Cleanup] %s/%s not found using label selector: %s.\n", name, namespace, labelSelector) @@ -335,7 +337,7 @@ func (gr *GenericResources) SyncQueueJob(aw *arbv1.AppWrapper, awr *arbv1.AppWra return pods, nil } -// checks if object has pod template spec and add new labels +//checks if object has pod template spec and add new labels func addLabelsToPodTemplateField(unstruct *unstructured.Unstructured, labels map[string]string) (hasFields bool) { spec, isFound, _ := unstructured.NestedMap(unstruct.UnstructuredContent(), "spec") if !isFound { @@ -377,7 +379,7 @@ func addLabelsToPodTemplateField(unstruct *unstructured.Unstructured, labels map return isFound } -// checks if object has replicas and containers field +//checks if object has replicas and containers field func hasFields(obj runtime.RawExtension) (hasFields bool, replica float64, containers []v1.Container) { var unstruct unstructured.Unstructured unstruct.Object = make(map[string]interface{}) @@ -459,7 +461,7 @@ func createObject(namespaced bool, namespace string, name string, rsrc schema.Gr } } -func deleteObject(namespaced bool, namespace string, name string, rsrc schema.GroupVersionResource, dclient dynamic.Interface) error { +func deleteObject(namespaced bool, namespace string, name string, rsrc schema.GroupVersionResource, dclient dynamic.Interface) (erro error) { var err error backGround := metav1.DeletePropagationBackground delOptions := metav1.DeleteOptions{PropagationPolicy: &backGround} @@ -471,7 +473,7 @@ func deleteObject(namespaced bool, namespace string, name string, rsrc schema.Gr err = res.Delete(context.Background(), name, delOptions) } - if err != nil && !errors.IsNotFound(err) { + if err != nil { klog.Errorf("[deleteObject] Error deleting the object `%v`, the error is `%v`.", name, errors.ReasonForError(err)) return err } else { @@ -595,7 +597,7 @@ func getContainerResources(container v1.Container, replicas float64) *clustersta return req } -// returns status of an item present in etcd +//returns status of an item present in etcd func (gr *GenericResources) IsItemCompleted(awgr *arbv1.AppWrapperGenericResource, namespace string, appwrapperName string, genericItemName string) (completed bool) { dd := gr.clients.Discovery() apigroups, err := restmapper.GetAPIGroupResources(dd) diff --git a/test/e2e-kuttl/quota-forest/99-cleanup.yaml b/test/e2e-kuttl/quota-forest/99-cleanup.yaml index 59a37b7d1..3998eb375 100644 --- a/test/e2e-kuttl/quota-forest/99-cleanup.yaml +++ b/test/e2e-kuttl/quota-forest/99-cleanup.yaml @@ -6,4 +6,4 @@ error: [] unitTest: false delete: [] commands: - - command: kubectl delete namespace test --wait + - command: kubectl delete namespace test \ No newline at end of file diff --git a/test/e2e/queue.go b/test/e2e/queue.go index 6b02af1bd..661a024fb 100644 --- a/test/e2e/queue.go +++ b/test/e2e/queue.go @@ -42,7 +42,6 @@ import ( . "github.com/onsi/gomega" arbv1 "github.com/project-codeflare/multi-cluster-app-dispatcher/pkg/apis/controller/v1beta1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/util/wait" ) var _ = Describe("AppWrapper E2E Test", func() { @@ -97,7 +96,7 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Ready pods are expected for app wrapper: aw-deployment-2-550cpu") + Expect(err).NotTo(HaveOccurred()) // This should fill up the master node aw2 := createDeploymentAWwith350CPU(context, appendRandomString("aw-deployment-2-350cpu")) @@ -105,8 +104,8 @@ var _ = Describe("AppWrapper E2E Test", func() { // Using quite mode due to creating of pods in earlier step. err = waitAWReadyQuiet(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Ready pods are expected for app wrapper: aw-deployment-2-350cpu") - fmt.Fprintf(os.Stdout, "[e2e] MCAD CPU Accounting Test - Completed. Awaiting app wrapper cleanup...\n") + fmt.Fprintf(os.Stdout, "The error is %v", err) + Expect(err).NotTo(HaveOccurred()) }) It("MCAD CPU Preemption Test", func() { @@ -143,7 +142,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) It("MCAD CPU Requeuing - Completion After Enough Requeuing Times Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] Completion After Enough Requeuing Times Test - Started.\n") + fmt.Fprintf(os.Stdout, "[e2e] MCAD CPU Requeuing Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper @@ -158,8 +157,8 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createJobAWWithInitContainer(context, "aw-job-3-init-container", 60, "exponential", 0) appwrappers = append(appwrappers, aw) - err := waitAWPodsCompleted(context, aw, 12*time.Minute) // This test waits for 12 minutes to make sure all PODs complete - Expect(err).NotTo(HaveOccurred(), "Waiting for the pods to be completed") + err := waitAWPodsCompleted(context, aw, 720*time.Second) // This test waits for 10 minutes to make sure all PODs complete + Expect(err).NotTo(HaveOccurred()) }) It("MCAD CPU Requeuing - Deletion After Maximum Requeuing Times Test", func() { @@ -220,7 +219,7 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createDeploymentAW(context, "aw-deployment-3") appwrappers = append(appwrappers, aw) - fmt.Fprintf(GinkgoWriter, "[e2e] Awaiting %d pods running for AW %s.\n", aw.Spec.SchedSpec.MinAvailable, aw.Name) + fmt.Fprintf(os.Stdout, "[e2e] Awaiting %d pods running for AW %s.\n", aw.Spec.SchedSpec.MinAvailable, aw.Name) err := waitAWPodsReady(context, aw) Expect(err).NotTo(HaveOccurred()) }) @@ -368,7 +367,7 @@ var _ = Describe("AppWrapper E2E Test", func() { }) - PIt("Create AppWrapper - Namespace Only - 0 Pods", func() { + It("Create AppWrapper - Namespace Only - 0 Pods", func() { fmt.Fprintf(os.Stdout, "[e2e] Create AppWrapper - Namespace Only - 0 Pods - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper @@ -411,10 +410,10 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWAnyPodsExists(context, aw) - Expect(err).NotTo(HaveOccurred(), "Expecting any pods for app wrapper: aw-deployment-2-550-vs-550-cpu") + Expect(err).NotTo(HaveOccurred()) err = waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550-vs-550-cpu") + Expect(err).NotTo(HaveOccurred()) }) It("MCAD Scheduling Fail Fast Preemption Test", func() { @@ -430,7 +429,7 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) time.Sleep(1 * time.Minute) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-deployment-2-550cpu") + Expect(err).NotTo(HaveOccurred()) // This should not fit on any node but should dispatch because there is enough aggregated resources. aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( @@ -439,13 +438,13 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Expecting pending pods for app wrapper: aw-ff-deployment-1-850-cpu") + fmt.Fprintf(os.Stdout, "The error is %v", err) + Expect(err).NotTo(HaveOccurred()) err = waitAWPodsPending(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Expecting pending pods (try 2) for app wrapper: aw-ff-deployment-1-850-cpu") - fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Pending pods found for app wrapper aw-ff-deployment-1-850-cpu\n") + Expect(err).NotTo(HaveOccurred()) - // This should fit on cluster after AW aw-deployment-1-850-cpu above is automatically preempted on + // This should fit on cluster after AW aw-deployment-1-700-cpu above is automatically preempted on // scheduling failure aw3 := createGenericDeploymentCustomPodResourcesWithCPUAW( context, appendRandomString("aw-ff-deployment-2-340-cpu"), "340m", "340m", 2, 60) @@ -454,20 +453,15 @@ var _ = Describe("AppWrapper E2E Test", func() { // Wait for pods to get created, assumes preemption around 10 minutes err = waitAWPodsExists(context, aw3, 720000*time.Millisecond) - Expect(err).NotTo(HaveOccurred(), "Expecting pods for app wrapper: aw-ff-deployment-2-340-cpu") - fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Pods not found for app wrapper aw-ff-deployment-2-340-cpu\n") + Expect(err).NotTo(HaveOccurred()) - // Make sure aw2 pods do not exist + // Make sure they are running err = waitAWPodsReady(context, aw3) - Expect(err).NotTo(HaveOccurred(), "Expecting no pods for app wrapper: aw-ff-deployment-2-340-cpu") - fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Ready pods found for app wrapper aw-ff-deployment-2-340-cpu\n") - - // Make sure pods from AW aw-deployment-1-850-cpu above do not exist proving preemption - time.Sleep(5 * time.Minute) + Expect(err).NotTo(HaveOccurred()) + time.Sleep(2 * time.Minute) + // Make sure pods from AW aw-deployment-1-700-cpu above do not exist proving preemption err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred(), "Expecting no pods for app wrapper : aw-ff-deployment-1-850-cpu") - fmt.Fprintf(os.Stdout, "[e2e] MCAD Scheduling Fail Fast Preemption Test - Completed. Awaiting app wrapper cleanup\n") - + Expect(err).To(HaveOccurred()) }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Not Queuing Test", func() { @@ -482,7 +476,7 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-2-550cpu") + Expect(err).NotTo(HaveOccurred()) // This should not fit on cluster but customPodResources is incorrect so AW pods are created aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( @@ -491,10 +485,11 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).NotTo(HaveOccurred(), "Not expecting pods to exist for app wrapper: aw-deployment-2-425-vs-426-cpu") + fmt.Fprintf(os.Stdout, "the error is %v", err) + Expect(err).NotTo(HaveOccurred()) err = waitAWPodsReady(context, aw2) - Expect(err).To(HaveOccurred(), "Not expecting pods to exist for app wrapper: aw-deployment-2-425-vs-426-cpu") + Expect(err).To(HaveOccurred()) }) It("MCAD Bad Custom Pod Resources vs. Deployment Pod Resource Queuing Test 2", func() { @@ -510,21 +505,21 @@ var _ = Describe("AppWrapper E2E Test", func() { time.Sleep(1 * time.Minute) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Waiting for pods to be ready for app wrapper: aw-deployment-2-550cpu") + Expect(err).NotTo(HaveOccurred()) // This should fit on cluster but customPodResources is incorrect so AW pods are not created aw2 := createGenericDeploymentCustomPodResourcesWithCPUAW( - context, appendRandomString("aw-deployment-2-427-vs-425-cpu"), "427m", "425m", 2, 60) + context, appendRandomString("aw-deployment-2-426-vs-425-cpu"), "426m", "425m", 2, 60) appwrappers = append(appwrappers, aw2) time.Sleep(1 * time.Minute) err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred(), "Waiting for no pods to exist for app wrapper: aw-deployment-2-427-vs-425-cpu") + Expect(err).To(HaveOccurred()) }) - It("MCAD app wrapper timeout Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD app wrapper timeout Test - Started.\n") + It("MCAD appwrapper timeout Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD appwrapper timeout Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers @@ -532,14 +527,20 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericAWTimeoutWithStatus(context, "aw-test-jobtimeout-with-comp-1") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-test-jobtimeout-with-comp-1") + Expect(err1).NotTo(HaveOccurred()) time.Sleep(60 * time.Second) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), "Expecting no error when getting app wrapper status") - fmt.Fprintf(GinkgoWriter, "[e2e] status of app wrapper: %v.\n", aw1.Status) - Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateFailed), "Expecting a failed state") + if err != nil { + fmt.Fprintf(GinkgoWriter, "Error getting status") + } + pass := false + fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) + if aw1.Status.State == arbv1.AppWrapperStateFailed { + pass = true + } + Expect(pass).To(BeTrue()) appwrappers = append(appwrappers, aw) - fmt.Fprintf(os.Stdout, "[e2e] MCAD app wrapper timeout Test - Completed.\n") + fmt.Fprintf(os.Stdout, "[e2e] MCAD appwrapper timeout Test - Completed.\n") }) It("MCAD Job Completion Test", func() { @@ -555,13 +556,17 @@ var _ = Describe("AppWrapper E2E Test", func() { time.Sleep(1 * time.Minute) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) if err != nil { - fmt.Fprintf(GinkgoWriter, "Error getting status, %v\n", err) + fmt.Fprint(GinkgoWriter, "Error getting status") } - Expect(err).Should(Succeed()) - Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateCompleted)) + pass := false fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) + if aw1.Status.State == arbv1.AppWrapperStateCompleted { + pass = true + } + Expect(pass).To(BeTrue()) appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion Test - Completed.\n") + }) It("MCAD Multi-Item Job Completion Test", func() { @@ -573,14 +578,21 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericJobAWWithMultipleStatus(context, "aw-test-job-with-comp-ms-21") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: 'aw-test-job-with-comp-ms-21'") + Expect(err1).NotTo(HaveOccurred()) time.Sleep(1 * time.Minute) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), "No error is expected when getting status") - fmt.Fprintf(GinkgoWriter, "[e2e] MCAD Multi-Item Job Completion Test status of AW %v.\n", aw1.Status) - Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateCompleted), "Expecting a completed app wrapper status") + if err != nil { + fmt.Fprint(GinkgoWriter, "Error getting status") + } + pass := false + fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) + if aw1.Status.State == arbv1.AppWrapperStateCompleted { + pass = true + } + Expect(pass).To(BeTrue()) appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion Test - Completed.\n") + }) It("MCAD GenericItem Without Status Test", func() { @@ -592,9 +604,8 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createAWGenericItemWithoutStatus(context, "aw-test-job-with-comp-44") err1 := waitAWPodsReady(context, aw) - appwrappers = append(appwrappers, aw) - fmt.Fprintf(GinkgoWriter, "The error is: %v", err1) - Expect(err1).To(HaveOccurred(), "Expecting for pods not to be ready for app wrapper: aw-test-job-with-comp-44") + fmt.Fprintf(os.Stdout, "The error is: %v", err1) + Expect(err1).To(HaveOccurred()) fmt.Fprintf(os.Stdout, "[e2e] MCAD GenericItem Without Status Test - Completed.\n") }) @@ -608,14 +619,14 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericJobAWWithScheduleSpec(context, "aw-test-job-with-scheduling-spec") err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred(), "Waiting for pods to be ready") + Expect(err1).NotTo(HaveOccurred()) err2 := waitAWPodsCompleted(context, aw, 90*time.Second) - Expect(err2).NotTo(HaveOccurred(), "Waiting for pods to be completed") + Expect(err2).NotTo(HaveOccurred()) // Once pods are completed, we wait for them to see if they change their status to anything BUT "Completed" // which SHOULD NOT happen because the job is done err3 := waitAWPodsNotCompleted(context, aw) - Expect(err3).To(HaveOccurred(), "Waiting for pods not to be completed") + Expect(err3).To(HaveOccurred()) appwrappers = append(appwrappers, aw) fmt.Fprintf(os.Stdout, "[e2e] MCAD Job Completion No-requeue Test - Completed.\n") @@ -660,37 +671,44 @@ var _ = Describe("AppWrapper E2E Test", func() { appwrappers = append(appwrappers, aw) time.Sleep(1 * time.Minute) err := waitAWPodsReady(context, aw) - Expect(err).NotTo(HaveOccurred(), "Waiting for pods to be ready for app wrapper: aw-deployment-2-550cpu") + Expect(err).NotTo(HaveOccurred()) // This should not fit on cluster aw2 := createDeploymentAWwith426CPU(context, appendRandomString("aw-deployment-2-426cpu")) appwrappers = append(appwrappers, aw2) err = waitAWAnyPodsExists(context, aw2) - Expect(err).To(HaveOccurred(), "No pods for app wrapper `aw-deployment-2-426cpu` are expected.") + Expect(err).To(HaveOccurred()) }) - It("MCAD Deployment RunningHoldCompletion Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RunningHoldCompletion Test - Started.\n") + It("MCAD Deployment RuningHoldCompletion Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers defer cleanupTestObjectsPtr(context, appwrappersPtr) - aw := createGenericDeploymentAWWithMultipleItems(context, "aw-deployment-rhc") - appwrappers = append(appwrappers, aw) - time.Sleep(30 * time.Second) + aw := createGenericDeploymentAWWithMultipleItems(context, appendRandomString("aw-deployment-2-status")) + time.Sleep(1 * time.Minute) err1 := waitAWPodsReady(context, aw) - Expect(err1).NotTo(HaveOccurred(), "Expecting pods to be ready for app wrapper: aw-deployment-rhc") + Expect(err1).NotTo(HaveOccurred()) aw1, err := context.karclient.ArbV1().AppWrappers(aw.Namespace).Get(aw.Name, metav1.GetOptions{}) - Expect(err).NotTo(HaveOccurred(), "Expecting to get app wrapper status") + if err != nil { + fmt.Fprintf(GinkgoWriter, "Error getting status, %v", err) + } + pass := false fmt.Fprintf(GinkgoWriter, "[e2e] status of AW %v.\n", aw1.Status.State) - Expect(aw1.Status.State).To(Equal(arbv1.AppWrapperStateRunningHoldCompletion)) - fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed. Awaiting app wrapper cleanup.\n") + if aw1.Status.State == arbv1.AppWrapperStateRunningHoldCompletion { + pass = true + } + Expect(pass).To(BeTrue()) + appwrappers = append(appwrappers, aw) + fmt.Fprintf(os.Stdout, "[e2e] MCAD Deployment RuningHoldCompletion Test - Completed.\n") + }) - It("MCAD Service no RunningHoldCompletion or Complete Test", func() { - fmt.Fprintf(os.Stdout, "[e2e] MCAD Service no RunningHoldCompletion or Complete Test - Started.\n") + It("MCAD Service no RuningHoldCompletion or Complete Test", func() { + fmt.Fprintf(os.Stdout, "[e2e] MCAD Service no RuningHoldCompletion or Complete Test - Started.\n") context := initTestContext() var appwrappers []*arbv1.AppWrapper appwrappersPtr := &appwrappers @@ -740,34 +758,14 @@ var _ = Describe("AppWrapper E2E Test", func() { aw := createGenericDeploymentWithCPUAW(context, name, cpuDemand, replicas) aws = append(aws, aw) } + // Give the deployments time to create pods - time.Sleep(70 * time.Second) - uncompletedAWS := aws - // wait for pods to become ready, don't assume that they are ready in the order of submission. - err := wait.Poll(500*time.Millisecond, 3*time.Minute, func() (done bool, err error) { - t := time.Now() - toCheckAWS := make([]*arbv1.AppWrapper, 0, len(aws)) - for _, aw := range uncompletedAWS { - err := waitAWPodsReadyEx(context, aw, 100*time.Millisecond, int(aw.Spec.SchedSpec.MinAvailable), true) - if err != nil { - toCheckAWS = append(toCheckAWS, aw) - } - } - uncompletedAWS = toCheckAWS - fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - There are %d app wrappers without ready pods at time %s\n", len(toCheckAWS), t.Format(time.RFC3339)) - if len(toCheckAWS) == 0 { - return true, nil - } - return false, nil - }) - if err != nil { - fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - There are %d app wrappers without ready pods, err = %v\n", len(uncompletedAWS), err) - for _, uaw := range uncompletedAWS { - fmt.Fprintf(GinkgoWriter, "[e2e] Generic 50 Deployment Only - 2 pods each - Uncompleted AW '%s/%s'\n", uaw.Namespace, uaw.Name) - } + // FIXME: do not assume that the pods are in running state in the order of submission. + time.Sleep(2 * time.Minute) + for i := 0; i < len(aws); i++ { + err := waitAWReadyQuiet(context, aws[i]) + Expect(err).NotTo(HaveOccurred()) } - Expect(err).Should(Succeed(), "All app wrappers should have completed") - fmt.Fprintf(os.Stdout, "[e2e] Generic 50 Deployment Only - 2 pods each - Completed, awaiting app wrapper clean up.\n") }) /* diff --git a/test/e2e/util.go b/test/e2e/util.go index 9c85d6d8c..ced2f8142 100644 --- a/test/e2e/util.go +++ b/test/e2e/util.go @@ -696,7 +696,7 @@ func awNamespacePhase(ctx *context, aw *arbv1.AppWrapper, phase []v1.NamespacePh } func waitAWPodsReady(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, threeHundredSeconds, int(aw.Spec.SchedSpec.MinAvailable), false) + return waitAWPodsReadyEx(ctx, aw, int(aw.Spec.SchedSpec.MinAvailable), false) } func waitAWPodsCompleted(ctx *context, aw *arbv1.AppWrapper, timeout time.Duration) error { @@ -708,7 +708,7 @@ func waitAWPodsNotCompleted(ctx *context, aw *arbv1.AppWrapper) error { } func waitAWReadyQuiet(ctx *context, aw *arbv1.AppWrapper) error { - return waitAWPodsReadyEx(ctx, aw, threeHundredSeconds, int(aw.Spec.SchedSpec.MinAvailable), true) + return waitAWPodsReadyEx(ctx, aw, int(aw.Spec.SchedSpec.MinAvailable), true) } func waitAWAnyPodsExists(ctx *context, aw *arbv1.AppWrapper) error { @@ -736,8 +736,8 @@ func waitAWPending(ctx *context, aw *arbv1.AppWrapper) error { []v1.PodPhase{v1.PodPending}, int(aw.Spec.SchedSpec.MinAvailable), false)) } -func waitAWPodsReadyEx(ctx *context, aw *arbv1.AppWrapper, waitDuration time.Duration, taskNum int, quite bool) error { - return wait.Poll(100*time.Millisecond, waitDuration, awPodPhase(ctx, aw, +func waitAWPodsReadyEx(ctx *context, aw *arbv1.AppWrapper, taskNum int, quite bool) error { + return wait.Poll(100*time.Millisecond, threeHundredSeconds, awPodPhase(ctx, aw, []v1.PodPhase{v1.PodRunning, v1.PodSucceeded}, taskNum, quite)) } @@ -1085,32 +1085,32 @@ func createDeploymentAWwith550CPU(context *context, name string) *arbv1.AppWrapp rb := []byte(`{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": { - "name": "` + name + `", + "name": "aw-deployment-2-550cpu", "namespace": "test", "labels": { - "app": "` + name + `" + "app": "aw-deployment-2-550cpu" } }, "spec": { "replicas": 2, "selector": { "matchLabels": { - "app": "` + name + `" + "app": "aw-deployment-2-550cpu" } }, "template": { "metadata": { "labels": { - "app": "` + name + `" + "app": "aw-deployment-2-550cpu" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" + "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-550cpu" } }, "spec": { "containers": [ { - "name": "` + name + `", + "name": "aw-deployment-2-550cpu", "image": "kicbase/echo-server:1.0", "resources": { "requests": { @@ -1490,36 +1490,36 @@ func createDeploymentAWwith426CPU(context *context, name string) *arbv1.AppWrapp rb := []byte(`{"apiVersion": "apps/v1", "kind": "Deployment", "metadata": { - "name": "` + name + `", + "name": "aw-deployment-2-426cpu", "namespace": "test", "labels": { - "app": "` + name + `" + "app": "aw-deployment-2-426cpu" } }, "spec": { "replicas": 2, "selector": { "matchLabels": { - "app": "` + name + `" + "app": "aw-deployment-2-426cpu" } }, "template": { "metadata": { "labels": { - "app": "` + name + `" + "app": "aw-deployment-2-426cpu" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" + "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-426cpu" } }, "spec": { "containers": [ { - "name": "` + name + `", + "name": "aw-deployment-2-426cpu", "image": "kicbase/echo-server:1.0", "resources": { "requests": { - "cpu": "427m" + "cpu": "426m" } }, "ports": [ @@ -2237,87 +2237,34 @@ func createGenericServiceAWWithNoStatus(context *context, name string) *arbv1.Ap func createGenericDeploymentAWWithMultipleItems(context *context, name string) *arbv1.AppWrapper { rb := []byte(`{"apiVersion": "apps/v1", - "kind": "Deployment", - "metadata": { - "name": "` + name + `-deployment-1", - "namespace": "test", - "labels": { - "app": "` + name + `-deployment-1" - } - }, - "spec": { - "replicas": 1, - "selector": { - "matchLabels": { - "app": "` + name + `-deployment-1" - } - }, - "template": { - "metadata": { - "labels": { - "app": "` + name + `-deployment-1" - }, - "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" - } - }, - "spec": { - "initContainers": [ - { - "name": "job-init-container", - "image": "k8s.gcr.io/busybox:latest", - "command": ["sleep", "200"], - "resources": { - "requests": { - "cpu": "500m" - } - } - } - ], - "containers": [ - { - "name": "` + name + `-deployment-1", - "image": "kicbase/echo-server:1.0", - "ports": [ - { - "containerPort": 80 - } - ] - } - ] - } - } - }} `) - rb1 := []byte(`{"apiVersion": "apps/v1", - "kind": "Deployment", + "kind": "Deployment", "metadata": { - "name": "` + name + `-deployment-2", + "name": "aw-deployment-2-status", "namespace": "test", "labels": { - "app": "` + name + `-deployment-2" + "app": "aw-deployment-2-status" } }, - "spec": { "replicas": 1, "selector": { "matchLabels": { - "app": "` + name + `-deployment-2" + "app": "aw-deployment-2-status" } }, "template": { "metadata": { "labels": { - "app": "` + name + `-deployment-2" + "app": "aw-deployment-2-status" }, "annotations": { - "appwrapper.mcad.ibm.com/appwrapper-name": "` + name + `" + "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-2-status" } }, "spec": { "containers": [ { - "name": "` + name + `-deployment-2", + "name": "aw-deployment-2-status", "image": "kicbase/echo-server:1.0", "ports": [ { @@ -2330,6 +2277,47 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * } }} `) + rb1 := []byte(`{"apiVersion": "apps/v1", + "kind": "Deployment", +"metadata": { + "name": "aw-deployment-3-status", + "namespace": "test", + "labels": { + "app": "aw-deployment-3-status" + } +}, +"spec": { + "replicas": 1, + "selector": { + "matchLabels": { + "app": "aw-deployment-3-status" + } + }, + "template": { + "metadata": { + "labels": { + "app": "aw-deployment-3-status" + }, + "annotations": { + "appwrapper.mcad.ibm.com/appwrapper-name": "aw-deployment-3-status" + } + }, + "spec": { + "containers": [ + { + "name": "aw-deployment-3-status", + "image": "kicbase/echo-server:1.0", + "ports": [ + { + "containerPort": 80 + } + ] + } + ] + } + } +}} `) + var schedSpecMin int = 1 aw := &arbv1.AppWrapper{ @@ -2345,7 +2333,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * GenericItems: []arbv1.AppWrapperGenericResource{ { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", name, "deployment-1"), + Name: fmt.Sprintf("%s-%s", name, "aw-deployment-2-status"), Namespace: "test", }, DesiredAvailable: 1, @@ -2356,7 +2344,7 @@ func createGenericDeploymentAWWithMultipleItems(context *context, name string) * }, { ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s", name, "deployment-2"), + Name: fmt.Sprintf("%s-%s", name, "aw-deployment-3-status"), Namespace: "test", }, DesiredAvailable: 1, diff --git a/test/yaml/0002-aw-job-quota.yaml b/test/yaml/0002-aw-job-quota.yaml deleted file mode 100644 index 36cfbb31c..000000000 --- a/test/yaml/0002-aw-job-quota.yaml +++ /dev/null @@ -1,63 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: my-job-1 - namespace: test - labels: - quota_context: "bronze" - quota_service: "gold" -spec: - schedulingSpec: - minAvailable: 1 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: my-job-1 - namespace: test - labels: - appwrapper.mcad.ibm.com: my-job-1 - spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: my-job-1 - namespace: test - labels: - appwrapper.mcad.ibm.com: my-job-1 - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - containers: - - name: ubuntu - image: ubuntu:latest - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - sleep 30 - resources: - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi diff --git a/test/yaml/0003-aw-job-no-quota.yaml b/test/yaml/0003-aw-job-no-quota.yaml deleted file mode 100644 index a0f8452c3..000000000 --- a/test/yaml/0003-aw-job-no-quota.yaml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: my-no-quota-job-0003 - namespace: test -spec: - schedulingSpec: - minAvailable: 1 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: my-no-quota-job-0003 - namespace: test - labels: - appwrapper.mcad.ibm.com: my-no-quota-job-0003 - spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: my-no-quota-job-0003 - namespace: test - labels: - appwrapper.mcad.ibm.com: my-no-quota-job-0003 - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - containers: - - name: ubuntu - image: ubuntu:latest - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - sleep 30 - resources: - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi diff --git a/test/yaml/0004-aw-large-job-no-quota.yaml b/test/yaml/0004-aw-large-job-no-quota.yaml deleted file mode 100644 index 30cf44efd..000000000 --- a/test/yaml/0004-aw-large-job-no-quota.yaml +++ /dev/null @@ -1,60 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: large-job-no-quota - namespace: test -spec: - schedulingSpec: - minAvailable: 1 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 3000m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 3000m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: large-job-no-quota - namespace: test - labels: - appwrapper.mcad.ibm.com: large-job-no-quota - spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: large-job-no-quota - namespace: test - labels: - appwrapper.mcad.ibm.com: large-job-no-quota - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - containers: - - name: ubuntu - image: ubuntu:latest - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - sleep 30 - resources: - requests: - cpu: 3000m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 3000m - nvidia.com/gpu: 0 - memory: 300Mi diff --git a/test/yaml/0005-aw-two-quota-jobs.yaml b/test/yaml/0005-aw-two-quota-jobs.yaml deleted file mode 100644 index 36d35a6b1..000000000 --- a/test/yaml/0005-aw-two-quota-jobs.yaml +++ /dev/null @@ -1,127 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: bronze-job-0005-01 - namespace: test - labels: - quota_context: "bronze" - quota_service: "default" -spec: - schedulingSpec: - minAvailable: 1 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: bronze-job-0005-01 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronze-job-0005-01 - spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: bronze-job-0005-01 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronze-job-0005-01 - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - containers: - - name: ubuntu - image: ubuntu:latest - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - sleep 30 - resources: - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi ---- -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: bronze-job-0005-02 - namespace: test - labels: - quota_context: "bronze" - quota_service: "default" -spec: - schedulingSpec: - minAvailable: 1 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: bronze-job-0005-02 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronze-job-0005-02 - spec: - parallelism: 1 - completions: 1 - template: - metadata: - name: bronze-job-0005-02 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronze-job-0005-02 - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - containers: - - name: ubuntu - image: ubuntu:latest - imagePullPolicy: IfNotPresent - command: - - sh - - -c - - | - sleep 30 - resources: - requests: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 900m - nvidia.com/gpu: 0 - memory: 300Mi diff --git a/test/yaml/0006-aw-init-containers.yaml b/test/yaml/0006-aw-init-containers.yaml deleted file mode 100644 index 4b6c7a690..000000000 --- a/test/yaml/0006-aw-init-containers.yaml +++ /dev/null @@ -1,86 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: bronnze-init-job-0006 - namespace: test - labels: - quota_context: bronze - quota_service: gold -spec: - schedulingSpec: - minAvailable: 3 - requeuing: - timeInSeconds: 60 - growthType: exponential - maxNumRequeuings: 0 - resources: - GenericItems: - - replicas: 1 - completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - name: bronnze-init-job-0006 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronnze-init-job-0006 - spec: - parallelism: 3 - terminationGracePeriodSeconds: 1 - restartPolicy: Never - template: - metadata: - name: bronnze-init-job-0006 - namespace: test - labels: - appwrapper.mcad.ibm.com: bronnze-init-job-0006 - spec: - terminationGracePeriodSeconds: 1 - restartPolicy: Never - initContainers: - - name: job-init-container - image: 'k8s.gcr.io/busybox:latest' - imagePullPolicy: IfNotPresent - command: - - sh - - '-c' - - | - sleep 200 - resources: - requests: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi - containers: - - name: job-container - image: 'k8s.gcr.io/busybox:latest' - imagePullPolicy: IfNotPresent - command: - - sh - - '-c' - - | - sleep 10 - resources: - requests: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi - limits: - cpu: 500m - nvidia.com/gpu: 0 - memory: 300Mi diff --git a/test/yaml/0008-aw-default.yaml b/test/yaml/0008-aw-default.yaml deleted file mode 100644 index 94653d0c0..000000000 --- a/test/yaml/0008-aw-default.yaml +++ /dev/null @@ -1,56 +0,0 @@ -apiVersion: mcad.ibm.com/v1beta1 -kind: AppWrapper -metadata: - name: defaultaw-schd-spec-with-timeout-1 - namespace: default -spec: - # schedulingSpec: - # minAvailable: 1 - # requeuing: - # timeInSeconds: 120 - # growthType: "exponential" - priority: 9 - resources: - GenericItems: - - replicas: 1 - # completionstatus: Complete - custompodresources: - - replicas: 1 - requests: - cpu: 500m - memory: 512Mi - nvidia.com/gpu: 0 - limits: - cpu: 500m - memory: 512Mi - nvidia.com/gpu: 0 - generictemplate: - apiVersion: batch/v1 - kind: Job - metadata: - namespace: default - name: defaultaw-schd-spec-with-timeout-1 - # labels: - # appwrapper.mcad.ibm.com: defaultaw-schd-spec-with-timeout-1 - spec: - parallelism: 1 - completions: 1 - template: - metadata: - namespace: default - labels: - appwrapper.mcad.ibm.com: "defaultaw-schd-spec-with-timeout-1" - spec: - containers: - - name: defaultaw-schd-spec-with-timeout-1 - image: ubuntu:latest - command: [ "/bin/bash", "-c", "--" ] - args: [ "sleep 10" ] - resources: - requests: - memory: "512Mi" - cpu: "500m" - limits: - memory: "512Mi" - cpu: "500m" - restartPolicy: Never