Skip to content
This repository was archived by the owner on Nov 16, 2023. It is now read-only.

Commit c4be168

Browse files
authored
Enrich PodSpecError to early fail Pod (#52)
1 parent 7789e3e commit c4be168

File tree

5 files changed

+19
-12
lines changed

5 files changed

+19
-12
lines changed

pkg/apis/frameworkcontroller/v1/completion.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ const (
6868
CompletionCodeConfigMapCreationTimeout CompletionCode = -110
6969
CompletionCodePodCreationTimeout CompletionCode = -111
7070
// -2XX: Permanent Error
71-
CompletionCodePodSpecInvalid CompletionCode = -200
71+
CompletionCodePodSpecPermanentError CompletionCode = -200
7272
CompletionCodeStopFrameworkRequested CompletionCode = -210
7373
CompletionCodeFrameworkAttemptCompletion CompletionCode = -220
7474
// -3XX: Unknown Error
@@ -163,8 +163,8 @@ func initCompletionCodeInfos() {
163163
[]CompletionTypeAttribute{CompletionTypeAttributeTransient}},
164164
},
165165
{
166-
Code: CompletionCodePodSpecInvalid.Ptr(),
167-
Phrase: "PodSpecInvalid",
166+
Code: CompletionCodePodSpecPermanentError.Ptr(),
167+
Phrase: "PodSpecPermanentError",
168168
Type: CompletionType{CompletionTypeNameFailed,
169169
[]CompletionTypeAttribute{CompletionTypeAttributePermanent}},
170170
},

pkg/apis/frameworkcontroller/v1/funcs.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -537,8 +537,7 @@ func (rp RetryPolicySpec) ShouldRetry(
537537
ct := cs.Type
538538

539539
// 0. Built-in Always-on RetryPolicy
540-
if cs.Code == CompletionCodePodSpecInvalid ||
541-
cs.Code == CompletionCodeStopFrameworkRequested ||
540+
if cs.Code == CompletionCodeStopFrameworkRequested ||
542541
cs.Code == CompletionCodeFrameworkAttemptCompletion {
543542
return RetryDecision{false, true, 0, fmt.Sprintf(
544543
"CompletionCode is %v, %v", cs.Code, cs.Phrase)}

pkg/apis/frameworkcontroller/v1/types.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,8 +134,7 @@ const (
134134
// complete a single Task in the TaskRole.
135135
//
136136
// Usage:
137-
// If the Pod Spec is invalid or
138-
// the ExecutionType is ExecutionStop or
137+
// If the ExecutionType is ExecutionStop or
139138
// the Task's FrameworkAttempt is completing,
140139
// will not retry.
141140
//

pkg/controller/controller.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,7 @@ func (c *FrameworkController) createConfigMap(
11461146

11471147
remoteCM, createErr := c.kClient.CoreV1().ConfigMaps(f.Namespace).Create(cm)
11481148
if createErr != nil {
1149-
if apiErrors.IsConflict(createErr) {
1149+
if apiErrors.IsAlreadyExists(createErr) {
11501150
// Best effort to judge if conflict with a not controlled object.
11511151
localCM, getErr := c.cmLister.ConfigMaps(f.Namespace).Get(cm.Name)
11521152
if getErr == nil && !meta.IsControlledBy(localCM, f) {
@@ -1431,9 +1431,9 @@ func (c *FrameworkController) syncTaskState(
14311431
pod, err = c.createPod(f, cm, taskRoleName, taskIndex)
14321432
if err != nil {
14331433
apiErr := errorWrap.Cause(err)
1434-
if apiErrors.IsInvalid(apiErr) {
1434+
if internal.IsPodSpecPermanentError(apiErr) {
14351435
// Should be Framework Error instead of Platform Transient Error.
1436-
diag := fmt.Sprintf("%v", apiErr)
1436+
diag := fmt.Sprintf("Failed to create Pod: %v", common.ToJson(apiErr))
14371437
klog.Info(logPfx + diag)
14381438

14391439
// Ensure pod is deleted in remote to avoid managed pod leak after
@@ -1444,7 +1444,7 @@ func (c *FrameworkController) syncTaskState(
14441444
}
14451445

14461446
c.completeTaskAttempt(f, taskRoleName, taskIndex, true,
1447-
ci.CompletionCodePodSpecInvalid.
1447+
ci.CompletionCodePodSpecPermanentError.
14481448
NewTaskAttemptCompletionStatus(diag, nil))
14491449
return nil
14501450
} else {
@@ -1702,7 +1702,7 @@ func (c *FrameworkController) createPod(
17021702

17031703
remotePod, createErr := c.kClient.CoreV1().Pods(f.Namespace).Create(pod)
17041704
if createErr != nil {
1705-
if apiErrors.IsConflict(createErr) {
1705+
if apiErrors.IsAlreadyExists(createErr) {
17061706
// Best effort to judge if conflict with a not controlled object.
17071707
localPod, getErr := c.podLister.Pods(f.Namespace).Get(pod.Name)
17081708
if getErr == nil && !meta.IsControlledBy(localPod, cm) {

pkg/internal/utils.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
"k8s.io/client-go/tools/cache"
3939
"k8s.io/klog"
4040
"reflect"
41+
"strings"
4142
"time"
4243
)
4344

@@ -221,3 +222,11 @@ func GetPodDeletionStartTime(pod *core.Pod) *meta.Time {
221222
}
222223
return common.PtrTime(meta.NewTime(pod.DeletionTimestamp.Add(-gracePeriod)))
223224
}
225+
226+
func IsPodSpecPermanentError(apiErr error) bool {
227+
return apiErrors.IsBadRequest(apiErr) ||
228+
apiErrors.IsInvalid(apiErr) ||
229+
apiErrors.IsRequestEntityTooLargeError(apiErr) ||
230+
(apiErrors.IsForbidden(apiErr) &&
231+
!strings.Contains(apiErr.Error(), "exceeded quota"))
232+
}

0 commit comments

Comments
 (0)