Skip to content

Commit 6def23c

Browse files
committed
Fix error message for long-waiting operations
Get HTTP responses together with error, so that we could get full error messages. It also fixes some edge cases for success responses.
1 parent 7f77136 commit 6def23c

File tree

5 files changed

+101
-15
lines changed

5 files changed

+101
-15
lines changed

cluster-autoscaler/cloudprovider/azure/azure_agent_pool.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package azure
1818

1919
import (
20+
"errors"
2021
"fmt"
2122
"math/rand"
2223
"sort"
@@ -209,15 +210,21 @@ func (as *AgentPool) IncreaseSize(delta int) error {
209210
}
210211
ctx, cancel := getContextWithCancel()
211212
defer cancel()
212-
_, err = as.manager.azClient.deploymentsClient.CreateOrUpdate(ctx, as.manager.config.ResourceGroup, newDeploymentName, newDeployment)
213213
klog.V(3).Infof("Waiting for deploymentsClient.CreateOrUpdate(%s, %s, %v)", as.manager.config.ResourceGroup, newDeploymentName, newDeployment)
214-
if err != nil {
215-
return err
214+
resp, err := as.manager.azClient.deploymentsClient.CreateOrUpdate(ctx, as.manager.config.ResourceGroup, newDeploymentName, newDeployment)
215+
if isSuccessHTTPResponse(resp) {
216+
klog.V(3).Infof("deploymentsClient.CreateOrUpdate(%s, %s, %v) success", as.manager.config.ResourceGroup, newDeploymentName, newDeployment)
217+
as.curSize = int64(expectedSize)
218+
as.lastRefresh = time.Now()
219+
return nil
216220
}
217221

218-
as.curSize = int64(expectedSize)
219-
as.lastRefresh = time.Now()
220-
return err
222+
errMessage := err.Error()
223+
if resp != nil {
224+
errMessage = fmt.Sprintf("%s with HTTP status %s", err.Error(), resp.Status)
225+
}
226+
klog.Errorf("deploymentsClient.CreateOrUpdate for deployment %q failed: %s", newDeploymentName, errMessage)
227+
return errors.New(errMessage)
221228
}
222229

223230
// GetVirtualMachines returns list of nodes for the given agent pool.

cluster-autoscaler/cloudprovider/azure/azure_container_service_pool.go

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package azure
1818

1919
import (
20+
"errors"
2021
"fmt"
2122
"strings"
2223

@@ -193,7 +194,20 @@ func (agentPool *ContainerServiceAgentPool) setAKSNodeCount(count int) error {
193194
klog.Errorf("Failed to update AKS cluster (%q): %v", agentPool.clusterName, err)
194195
return err
195196
}
196-
return future.WaitForCompletionRef(updateCtx, aksClient.Client)
197+
198+
err = future.WaitForCompletionRef(updateCtx, aksClient.Client)
199+
resp := future.Response()
200+
if isSuccessHTTPResponse(resp) {
201+
klog.V(3).Infof("aksClient.CreateOrUpdate for aks cluster %q success", agentPool.clusterName)
202+
return nil
203+
}
204+
205+
errMessage := err.Error()
206+
if resp != nil {
207+
errMessage = fmt.Sprintf("%s with HTTP status %s", err.Error(), resp.Status)
208+
}
209+
klog.Errorf("aksClient.CreateOrUpdate for aks cluster %q failed: %s", agentPool.clusterName, errMessage)
210+
return errors.New(errMessage)
197211
}
198212

199213
// setACSNodeCount sets node count for ACS agent pool.
@@ -226,7 +240,20 @@ func (agentPool *ContainerServiceAgentPool) setACSNodeCount(count int) error {
226240
klog.Errorf("Failed to update ACS cluster (%q): %v", agentPool.clusterName, err)
227241
return err
228242
}
229-
return future.WaitForCompletionRef(updateCtx, acsClient.Client)
243+
244+
err = future.WaitForCompletionRef(updateCtx, acsClient.Client)
245+
resp := future.Response()
246+
if isSuccessHTTPResponse(resp) {
247+
klog.V(3).Infof("acsClient.CreateOrUpdate for acs cluster %q success", agentPool.clusterName)
248+
return nil
249+
}
250+
251+
errMessage := err.Error()
252+
if resp != nil {
253+
errMessage = fmt.Sprintf("%s with HTTP status %s", err.Error(), resp.Status)
254+
}
255+
klog.Errorf("acsClient.CreateOrUpdate for acs cluster %q failed: %s", agentPool.clusterName, errMessage)
256+
return errors.New(errMessage)
230257
}
231258

232259
//GetNodeCount returns the count of nodes from the managed agent pool profile

cluster-autoscaler/cloudprovider/azure/azure_scale_set.go

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package azure
1818

1919
import (
20+
"errors"
2021
"fmt"
2122
"math/rand"
2223
"strings"
@@ -153,15 +154,21 @@ func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {
153154
op.VirtualMachineScaleSetProperties.ProvisioningState = nil
154155
updateCtx, updateCancel := getContextWithCancel()
155156
defer updateCancel()
156-
_, err = scaleSet.manager.azClient.virtualMachineScaleSetsClient.CreateOrUpdate(updateCtx, resourceGroup, scaleSet.Name, op)
157-
if err != nil {
158-
klog.Errorf("virtualMachineScaleSetsClient.CreateOrUpdate for scale set %q failed: %v", scaleSet.Name, err)
159-
return err
157+
klog.V(3).Infof("Waiting for virtualMachineScaleSetsClient.CreateOrUpdate(%s)", scaleSet.Name)
158+
resp, err := scaleSet.manager.azClient.virtualMachineScaleSetsClient.CreateOrUpdate(updateCtx, resourceGroup, scaleSet.Name, op)
159+
if isSuccessHTTPResponse(resp) {
160+
klog.V(3).Infof("virtualMachineScaleSetsClient.CreateOrUpdate(%s) success", scaleSet.Name)
161+
scaleSet.curSize = size
162+
scaleSet.lastRefresh = time.Now()
163+
return nil
160164
}
161165

162-
scaleSet.curSize = size
163-
scaleSet.lastRefresh = time.Now()
164-
return nil
166+
errMessage := err.Error()
167+
if resp != nil {
168+
errMessage = fmt.Sprintf("%s with HTTP status %s", err.Error(), resp.Status)
169+
}
170+
klog.Errorf("virtualMachineScaleSetsClient.CreateOrUpdate for scale set %q failed: %s", scaleSet.Name, errMessage)
171+
return errors.New(errMessage)
165172
}
166173

167174
// TargetSize returns the current TARGET size of the node group. It is possible that the

cluster-autoscaler/cloudprovider/azure/azure_util.go

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -609,3 +609,17 @@ func checkResourceExistsFromError(err error) (bool, error) {
609609
}
610610
return false, v
611611
}
612+
613+
// isSuccessHTTPResponse determines if the response from an HTTP request suggests success
614+
func isSuccessHTTPResponse(resp *http.Response) bool {
615+
if resp == nil {
616+
return false
617+
}
618+
619+
// HTTP 2xx suggests a successful response
620+
if 199 < resp.StatusCode && resp.StatusCode < 300 {
621+
return true
622+
}
623+
624+
return false
625+
}

cluster-autoscaler/cloudprovider/azure/azure_util_test.go

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package azure
1818

1919
import (
2020
"fmt"
21+
"net/http"
2122
"testing"
2223

2324
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-10-01/compute"
@@ -124,3 +125,33 @@ func TestGetVMNameIndexWindows(t *testing.T) {
124125
t.Fatalf("unexpected error: %s", err)
125126
}
126127
}
128+
129+
func TestIsSuccessResponse(t *testing.T) {
130+
tests := []struct {
131+
code int
132+
expected bool
133+
}{
134+
{
135+
code: http.StatusNotFound,
136+
expected: false,
137+
},
138+
{
139+
code: http.StatusInternalServerError,
140+
expected: false,
141+
},
142+
{
143+
code: http.StatusOK,
144+
expected: true,
145+
},
146+
}
147+
148+
for _, test := range tests {
149+
resp := http.Response{
150+
StatusCode: test.code,
151+
}
152+
res := isSuccessHTTPResponse(&resp)
153+
if res != test.expected {
154+
t.Errorf("expected: %v, saw: %v", test.expected, res)
155+
}
156+
}
157+
}

0 commit comments

Comments
 (0)