@@ -19,9 +19,12 @@ package workloads
19
19
import (
20
20
"time"
21
21
22
+ kcore "k8s.io/api/core/v1"
22
23
kmeta "k8s.io/apimachinery/pkg/apis/meta/v1"
23
24
24
25
"github.com/cortexlabs/cortex/pkg/lib/errors"
26
+ "github.com/cortexlabs/cortex/pkg/lib/k8s"
27
+ "github.com/cortexlabs/cortex/pkg/lib/sets/strset"
25
28
"github.com/cortexlabs/cortex/pkg/operator/config"
26
29
)
27
30
@@ -60,6 +63,7 @@ func runCron() {
60
63
"workloadType" : workloadTypeAPI ,
61
64
"userFacing" : "true" ,
62
65
})
66
+
63
67
if err != nil {
64
68
config .Telemetry .ReportError (err )
65
69
errors .PrintError (err )
@@ -73,11 +77,14 @@ func runCron() {
73
77
failedPods , err := config .Kubernetes .ListPods (& kmeta.ListOptions {
74
78
FieldSelector : "status.phase=Failed" ,
75
79
})
80
+
76
81
if err != nil {
77
82
config .Telemetry .ReportError (err )
78
83
errors .PrintError (err )
79
84
}
80
85
86
+ deleteEvictedPods (failedPods )
87
+
81
88
if err := updateDataWorkloadErrors (failedPods ); err != nil {
82
89
config .Telemetry .ReportError (err )
83
90
errors .PrintError (err )
@@ -93,3 +100,33 @@ func reportAndRecover(strs ...string) error {
93
100
}
94
101
return nil
95
102
}
103
+
104
+ func deleteEvictedPods (failedPods []kcore.Pod ) {
105
+ evictedPods := []kcore.Pod {}
106
+ for _ , pod := range failedPods {
107
+ if pod .Status .Reason == k8s .ReasonEvicted {
108
+ evictedPods = append (evictedPods , pod )
109
+ }
110
+ }
111
+
112
+ if len (evictedPods ) > 0 {
113
+ savedEvictedPods := map [string ]kcore.Pod {}
114
+ currentWorkloadIDs := strset .New ()
115
+ for _ , ctx := range CurrentContexts () {
116
+ currentWorkloadIDs .Merge (ctx .ComputedResourceWorkloadIDs ())
117
+ }
118
+
119
+ for _ , pod := range evictedPods {
120
+ if currentWorkloadIDs .Has (pod .Labels ["workloadID" ]) {
121
+ if _ , ok := savedEvictedPods [pod .Labels ["resourceID" ]]; ! ok {
122
+ savedEvictedPods [pod .Labels ["resourceID" ]] = pod
123
+ continue
124
+ }
125
+ }
126
+ _ , err := config .Kubernetes .DeletePod (pod .Name )
127
+ if err != nil {
128
+ errors .PrintError (err )
129
+ }
130
+ }
131
+ }
132
+ }
0 commit comments