Skip to content

Commit 083393f

Browse files
authored
xds/resolver: fix resource deletion (#4143)
1 parent 85e55dc commit 083393f

File tree

3 files changed

+247
-50
lines changed

3 files changed

+247
-50
lines changed

xds/internal/resolver/serviceconfig.go

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@ type configSelector struct {
113113
var errNoMatchedRouteFound = status.Errorf(codes.Unavailable, "no matched route was found")
114114

115115
func (cs *configSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RPCConfig, error) {
116+
if cs == nil {
117+
return nil, status.Errorf(codes.Unavailable, "no valid clusters")
118+
}
116119
var rt *route
117120
// Loop through routes in order and select first match.
118121
for _, r := range cs.routes {
@@ -157,17 +160,8 @@ func (cs *configSelector) SelectConfig(rpcInfo iresolver.RPCInfo) (*iresolver.RP
157160
return config, nil
158161
}
159162

160-
// incRefs increments refs of all clusters referenced by this config selector.
161-
func (cs *configSelector) incRefs() {
162-
// Loops over cs.clusters, but these are pointers to entries in
163-
// activeClusters.
164-
for _, ci := range cs.clusters {
165-
atomic.AddInt32(&ci.refCount, 1)
166-
}
167-
}
168-
169-
// decRefs decrements refs of all clusters referenced by this config selector.
170-
func (cs *configSelector) decRefs() {
163+
// stop decrements refs of all clusters referenced by this config selector.
164+
func (cs *configSelector) stop() {
171165
// The resolver's old configSelector may be nil. Handle that here.
172166
if cs == nil {
173167
return
@@ -234,6 +228,13 @@ func (r *xdsResolver) newConfigSelector(su serviceUpdate) (*configSelector, erro
234228
}
235229
}
236230

231+
// Account for this config selector's clusters. Do this after no further
232+
// errors may occur. Note: cs.clusters are pointers to entries in
233+
// activeClusters.
234+
for _, ci := range cs.clusters {
235+
atomic.AddInt32(&ci.refCount, 1)
236+
}
237+
237238
return cs, nil
238239
}
239240

xds/internal/resolver/xds_resolver.go

Lines changed: 63 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,43 @@ type xdsResolver struct {
144144
curConfigSelector *configSelector
145145
}
146146

147+
// sendNewServiceConfig prunes active clusters, generates a new service config
148+
// based on the current set of active clusters, and sends an update to the
149+
// channel with that service config and the provided config selector. Returns
150+
// false if an error occurs while generating the service config and the update
151+
// cannot be sent.
152+
func (r *xdsResolver) sendNewServiceConfig(cs *configSelector) bool {
153+
// Delete entries from r.activeClusters with zero references;
154+
// otherwise serviceConfigJSON will generate a config including
155+
// them.
156+
r.pruneActiveClusters()
157+
158+
if cs == nil && len(r.activeClusters) == 0 {
159+
// There are no clusters and we are sending a failing configSelector.
160+
// Send an empty config, which picks pick-first, with no address, and
161+
// puts the ClientConn into transient failure.
162+
r.cc.UpdateState(resolver.State{ServiceConfig: r.cc.ParseServiceConfig("{}")})
163+
return true
164+
}
165+
166+
// Produce the service config.
167+
sc, err := serviceConfigJSON(r.activeClusters)
168+
if err != nil {
169+
// JSON marshal error; should never happen.
170+
r.logger.Errorf("%v", err)
171+
r.cc.ReportError(err)
172+
return false
173+
}
174+
r.logger.Infof("Received update on resource %v from xds-client %p, generated service config: %v", r.target.Endpoint, r.client, sc)
175+
176+
// Send the update to the ClientConn.
177+
state := iresolver.SetConfigSelector(resolver.State{
178+
ServiceConfig: r.cc.ParseServiceConfig(sc),
179+
}, cs)
180+
r.cc.UpdateState(state)
181+
return true
182+
}
183+
147184
// run is a long running goroutine which blocks on receiving service updates
148185
// and passes it on the ClientConn.
149186
func (r *xdsResolver) run() {
@@ -155,15 +192,15 @@ func (r *xdsResolver) run() {
155192
if update.err != nil {
156193
r.logger.Warningf("Watch error on resource %v from xds-client %p, %v", r.target.Endpoint, r.client, update.err)
157194
if xdsclient.ErrType(update.err) == xdsclient.ErrorTypeResourceNotFound {
158-
// If error is resource-not-found, it means the LDS resource
159-
// was removed. Send an empty service config, which picks
160-
// pick-first, with no address, and puts the ClientConn into
161-
// transient failure..
162-
r.cc.UpdateState(resolver.State{
163-
ServiceConfig: r.cc.ParseServiceConfig("{}"),
164-
})
165-
// Dereference the active config selector, if one exists.
166-
r.curConfigSelector.decRefs()
195+
// If error is resource-not-found, it means the LDS
196+
// resource was removed. Ultimately send an empty service
197+
// config, which picks pick-first, with no address, and
198+
// puts the ClientConn into transient failure. Before we
199+
// can do that, we may need to send a normal service config
200+
// along with an erroring (nil) config selector.
201+
r.sendNewServiceConfig(nil)
202+
// Stop and dereference the active config selector, if one exists.
203+
r.curConfigSelector.stop()
167204
r.curConfigSelector = nil
168205
continue
169206
}
@@ -173,43 +210,30 @@ func (r *xdsResolver) run() {
173210
r.cc.ReportError(update.err)
174211
continue
175212
}
176-
var cs *configSelector
177-
if !update.emptyUpdate {
178-
// Create the config selector for this update.
179-
var err error
180-
if cs, err = r.newConfigSelector(update.su); err != nil {
181-
r.logger.Warningf("Error parsing update on resource %v from xds-client %p: %v", r.target.Endpoint, r.client, err)
182-
r.cc.ReportError(err)
183-
continue
184-
}
185-
} else {
186-
// Empty update; use the existing config selector.
187-
cs = r.curConfigSelector
213+
if update.emptyUpdate {
214+
r.sendNewServiceConfig(r.curConfigSelector)
215+
continue
188216
}
189-
// Account for this config selector's clusters.
190-
cs.incRefs()
191-
// Delete entries from r.activeClusters with zero references;
192-
// otherwise serviceConfigJSON will generate a config including
193-
// them.
194-
r.pruneActiveClusters()
195-
// Produce the service config.
196-
sc, err := serviceConfigJSON(r.activeClusters)
217+
218+
// Create the config selector for this update.
219+
cs, err := r.newConfigSelector(update.su)
197220
if err != nil {
198-
// JSON marshal error; should never happen.
199-
r.logger.Errorf("%v", err)
221+
r.logger.Warningf("Error parsing update on resource %v from xds-client %p: %v", r.target.Endpoint, r.client, err)
200222
r.cc.ReportError(err)
201-
cs.decRefs()
202223
continue
203224
}
204-
r.logger.Infof("Received update on resource %v from xds-client %p, generated service config: %v", r.target.Endpoint, r.client, sc)
205-
// Send the update to the ClientConn.
206-
state := iresolver.SetConfigSelector(resolver.State{
207-
ServiceConfig: r.cc.ParseServiceConfig(sc),
208-
}, cs)
209-
r.cc.UpdateState(state)
225+
226+
if !r.sendNewServiceConfig(cs) {
227+
// JSON error creating the service config (unexpected); erase
228+
// this config selector and ignore this update, continuing with
229+
// the previous config selector.
230+
cs.stop()
231+
continue
232+
}
233+
210234
// Decrement references to the old config selector and assign the
211235
// new one as the current one.
212-
r.curConfigSelector.decRefs()
236+
r.curConfigSelector.stop()
213237
r.curConfigSelector = cs
214238
}
215239
}

xds/internal/resolver/xds_resolver_test.go

Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"time"
2727

2828
"github.com/google/go-cmp/cmp"
29+
"google.golang.org/grpc/codes"
2930
"google.golang.org/grpc/credentials/insecure"
3031
xdscreds "google.golang.org/grpc/credentials/xds"
3132
"google.golang.org/grpc/internal"
@@ -36,6 +37,7 @@ import (
3637
"google.golang.org/grpc/internal/wrr"
3738
"google.golang.org/grpc/resolver"
3839
"google.golang.org/grpc/serviceconfig"
40+
"google.golang.org/grpc/status"
3941
_ "google.golang.org/grpc/xds/internal/balancer/cdsbalancer" // To parse LB config
4042
"google.golang.org/grpc/xds/internal/balancer/clustermanager"
4143
"google.golang.org/grpc/xds/internal/client"
@@ -435,6 +437,176 @@ func (s) TestXDSResolverGoodServiceUpdate(t *testing.T) {
435437
}
436438
}
437439

440+
// TestXDSResolverRemovedWithRPCs tests the case where a config selector sends
441+
// an empty update to the resolver after the resource is removed.
442+
func (s) TestXDSResolverRemovedWithRPCs(t *testing.T) {
443+
xdsC := fakeclient.NewClient()
444+
xdsR, tcc, cancel := testSetup(t, setupOpts{
445+
xdsClientFunc: func() (xdsClientInterface, error) { return xdsC, nil },
446+
})
447+
defer cancel()
448+
defer xdsR.Close()
449+
450+
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
451+
defer cancel()
452+
waitForWatchListener(ctx, t, xdsC, targetStr)
453+
xdsC.InvokeWatchListenerCallback(xdsclient.ListenerUpdate{RouteConfigName: routeStr}, nil)
454+
waitForWatchRouteConfig(ctx, t, xdsC, routeStr)
455+
456+
// Invoke the watchAPI callback with a good service update and wait for the
457+
// UpdateState method to be called on the ClientConn.
458+
xdsC.InvokeWatchRouteConfigCallback(xdsclient.RouteConfigUpdate{
459+
VirtualHosts: []*xdsclient.VirtualHost{
460+
{
461+
Domains: []string{targetStr},
462+
Routes: []*client.Route{{Prefix: newStringP(""), Action: map[string]uint32{"test-cluster-1": 1}}},
463+
},
464+
},
465+
}, nil)
466+
467+
gotState, err := tcc.stateCh.Receive(ctx)
468+
if err != nil {
469+
t.Fatalf("ClientConn.UpdateState returned error: %v", err)
470+
}
471+
rState := gotState.(resolver.State)
472+
if err := rState.ServiceConfig.Err; err != nil {
473+
t.Fatalf("ClientConn.UpdateState received error in service config: %v", rState.ServiceConfig.Err)
474+
}
475+
476+
// "Make an RPC" by invoking the config selector.
477+
cs := iresolver.GetConfigSelector(rState)
478+
if cs == nil {
479+
t.Fatalf("received nil config selector")
480+
}
481+
482+
res, err := cs.SelectConfig(iresolver.RPCInfo{Context: context.Background()})
483+
if err != nil {
484+
t.Fatalf("Unexpected error from cs.SelectConfig(_): %v", err)
485+
}
486+
487+
// Delete the resource
488+
suErr := xdsclient.NewErrorf(xdsclient.ErrorTypeResourceNotFound, "resource removed error")
489+
xdsC.InvokeWatchRouteConfigCallback(xdsclient.RouteConfigUpdate{}, suErr)
490+
491+
if _, err = tcc.stateCh.Receive(ctx); err != nil {
492+
t.Fatalf("ClientConn.UpdateState returned error: %v", err)
493+
}
494+
495+
// "Finish the RPC"; this could cause a panic if the resolver doesn't
496+
// handle it correctly.
497+
res.OnCommitted()
498+
}
499+
500+
// TestXDSResolverRemovedResource tests for proper behavior after a resource is
501+
// removed.
502+
func (s) TestXDSResolverRemovedResource(t *testing.T) {
503+
xdsC := fakeclient.NewClient()
504+
xdsR, tcc, cancel := testSetup(t, setupOpts{
505+
xdsClientFunc: func() (xdsClientInterface, error) { return xdsC, nil },
506+
})
507+
defer cancel()
508+
defer xdsR.Close()
509+
510+
ctx, cancel := context.WithTimeout(context.Background(), defaultTestTimeout)
511+
defer cancel()
512+
waitForWatchListener(ctx, t, xdsC, targetStr)
513+
xdsC.InvokeWatchListenerCallback(xdsclient.ListenerUpdate{RouteConfigName: routeStr}, nil)
514+
waitForWatchRouteConfig(ctx, t, xdsC, routeStr)
515+
516+
// Invoke the watchAPI callback with a good service update and wait for the
517+
// UpdateState method to be called on the ClientConn.
518+
xdsC.InvokeWatchRouteConfigCallback(xdsclient.RouteConfigUpdate{
519+
VirtualHosts: []*xdsclient.VirtualHost{
520+
{
521+
Domains: []string{targetStr},
522+
Routes: []*client.Route{{Prefix: newStringP(""), Action: map[string]uint32{"test-cluster-1": 1}}},
523+
},
524+
},
525+
}, nil)
526+
wantJSON := `{"loadBalancingConfig":[{
527+
"xds_cluster_manager_experimental":{
528+
"children":{
529+
"test-cluster-1":{
530+
"childPolicy":[{"cds_experimental":{"cluster":"test-cluster-1"}}]
531+
}
532+
}
533+
}}]}`
534+
wantSCParsed := internal.ParseServiceConfigForTesting.(func(string) *serviceconfig.ParseResult)(wantJSON)
535+
536+
gotState, err := tcc.stateCh.Receive(ctx)
537+
if err != nil {
538+
t.Fatalf("ClientConn.UpdateState returned error: %v", err)
539+
}
540+
rState := gotState.(resolver.State)
541+
if err := rState.ServiceConfig.Err; err != nil {
542+
t.Fatalf("ClientConn.UpdateState received error in service config: %v", rState.ServiceConfig.Err)
543+
}
544+
if !internal.EqualServiceConfigForTesting(rState.ServiceConfig.Config, wantSCParsed.Config) {
545+
t.Errorf("ClientConn.UpdateState received different service config")
546+
t.Error("got: ", cmp.Diff(nil, rState.ServiceConfig.Config))
547+
t.Error("want: ", cmp.Diff(nil, wantSCParsed.Config))
548+
}
549+
550+
// "Make an RPC" by invoking the config selector.
551+
cs := iresolver.GetConfigSelector(rState)
552+
if cs == nil {
553+
t.Fatalf("received nil config selector")
554+
}
555+
556+
res, err := cs.SelectConfig(iresolver.RPCInfo{Context: context.Background()})
557+
if err != nil {
558+
t.Fatalf("Unexpected error from cs.SelectConfig(_): %v", err)
559+
}
560+
561+
// "Finish the RPC"; this could cause a panic if the resolver doesn't
562+
// handle it correctly.
563+
res.OnCommitted()
564+
565+
// Delete the resource. The channel should receive a service config with the
566+
// original cluster but with an erroring config selector.
567+
suErr := xdsclient.NewErrorf(xdsclient.ErrorTypeResourceNotFound, "resource removed error")
568+
xdsC.InvokeWatchRouteConfigCallback(xdsclient.RouteConfigUpdate{}, suErr)
569+
570+
if gotState, err = tcc.stateCh.Receive(ctx); err != nil {
571+
t.Fatalf("ClientConn.UpdateState returned error: %v", err)
572+
}
573+
rState = gotState.(resolver.State)
574+
if err := rState.ServiceConfig.Err; err != nil {
575+
t.Fatalf("ClientConn.UpdateState received error in service config: %v", rState.ServiceConfig.Err)
576+
}
577+
if !internal.EqualServiceConfigForTesting(rState.ServiceConfig.Config, wantSCParsed.Config) {
578+
t.Errorf("ClientConn.UpdateState received different service config")
579+
t.Error("got: ", cmp.Diff(nil, rState.ServiceConfig.Config))
580+
t.Error("want: ", cmp.Diff(nil, wantSCParsed.Config))
581+
}
582+
583+
// "Make another RPC" by invoking the config selector.
584+
cs = iresolver.GetConfigSelector(rState)
585+
if cs == nil {
586+
t.Fatalf("received nil config selector")
587+
}
588+
589+
res, err = cs.SelectConfig(iresolver.RPCInfo{Context: context.Background()})
590+
if err == nil || status.Code(err) != codes.Unavailable {
591+
t.Fatalf("Expected UNAVAILABLE error from cs.SelectConfig(_); got %v, %v", res, err)
592+
}
593+
594+
// In the meantime, an empty ServiceConfig update should have been sent.
595+
if gotState, err = tcc.stateCh.Receive(ctx); err != nil {
596+
t.Fatalf("ClientConn.UpdateState returned error: %v", err)
597+
}
598+
rState = gotState.(resolver.State)
599+
if err := rState.ServiceConfig.Err; err != nil {
600+
t.Fatalf("ClientConn.UpdateState received error in service config: %v", rState.ServiceConfig.Err)
601+
}
602+
wantSCParsed = internal.ParseServiceConfigForTesting.(func(string) *serviceconfig.ParseResult)("{}")
603+
if !internal.EqualServiceConfigForTesting(rState.ServiceConfig.Config, wantSCParsed.Config) {
604+
t.Errorf("ClientConn.UpdateState received different service config")
605+
t.Error("got: ", cmp.Diff(nil, rState.ServiceConfig.Config))
606+
t.Error("want: ", cmp.Diff(nil, wantSCParsed.Config))
607+
}
608+
}
609+
438610
func (s) TestXDSResolverWRR(t *testing.T) {
439611
xdsC := fakeclient.NewClient()
440612
xdsR, tcc, cancel := testSetup(t, setupOpts{

0 commit comments

Comments
 (0)