Skip to content

Commit 21ace57

Browse files
committed
Consider AZ awareness disabled if only 1 zone is present in list rules
Signed-off-by: Emmanuel Lodovice <[email protected]>
1 parent 5fbda3c commit 21ace57

File tree

3 files changed

+44
-1
lines changed

3 files changed

+44
-1
lines changed

pkg/ruler/ruler_ring.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,10 @@ func GetReplicationSetForListRule(r ring.ReadRing, cfg *RingConfig) (ring.Replic
144144
// to 0, and then we update them whether zone-awareness is enabled or not.
145145
maxErrors := 0
146146
maxUnavailableZones := 0
147-
if cfg.ZoneAwarenessEnabled {
147+
// Because ring's Get method returns a number of ruler equal to the replication factor even if there is only 1 zone
148+
// and ZoneAwarenessEnabled, we can consider that ZoneAwarenessEnabled is disabled if there is only 1 zone since
149+
// rules are still replicated to rulers in the same zone.
150+
if cfg.ZoneAwarenessEnabled && len(ringZones) > 1 {
148151
numReplicatedZones := min(len(ringZones), r.ReplicationFactor())
149152
// Given that quorum is not required, we only need at least one of the zone to be healthy to succeed. But we
150153
// also need to handle case when RF < number of zones.

pkg/ruler/ruler_ring_test.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,23 @@ func TestGetReplicationSetForListRule(t *testing.T) {
180180
"z2": {},
181181
},
182182
},
183+
"should succeed on 1 unhealthy instances in RF=3 zone replication enabled but only 1 zone": {
184+
ringInstances: map[string]ring.InstanceDesc{
185+
"instance-1": {Addr: "127.0.0.1", State: ring.ACTIVE, Timestamp: now.Unix(), Tokens: g.GenerateTokens(ring.NewDesc(), "instance-1", "z1", 128, true), Zone: "z1"},
186+
"instance-2": {Addr: "127.0.0.2", State: ring.ACTIVE, Timestamp: now.Add(-10 * time.Second).Unix(), Tokens: g.GenerateTokens(ring.NewDesc(), "instance-2", "z1", 128, true), Zone: "z1"},
187+
"instance-3": {Addr: "127.0.0.3", State: ring.ACTIVE, Timestamp: now.Add(-20 * time.Second).Unix(), Tokens: g.GenerateTokens(ring.NewDesc(), "instance-3", "z1", 128, true), Zone: "z1"},
188+
"instance-4": {Addr: "127.0.0.4", State: ring.PENDING, Timestamp: now.Add(-30 * time.Second).Unix(), Tokens: g.GenerateTokens(ring.NewDesc(), "instance-4", "z1", 128, true), Zone: "z1"},
189+
},
190+
ringHeartbeatTimeout: time.Minute,
191+
ringReplicationFactor: 3,
192+
expectedSet: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"},
193+
enableAZReplication: true,
194+
expectedFailedZones: map[string]struct{}{
195+
"z1": {},
196+
},
197+
expectedMaxUnavailableZones: 0,
198+
expectedMaxError: 1,
199+
},
183200
}
184201

185202
for testName, testData := range tests {

pkg/ruler/ruler_test.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -473,6 +473,11 @@ func TestGetRules(t *testing.T) {
473473
"ruler2": "b",
474474
"ruler3": "c",
475475
}
476+
rulerAZSingleZone := map[string]string{
477+
"ruler1": "a",
478+
"ruler2": "a",
479+
"ruler3": "a",
480+
}
476481

477482
expectedRules := expectedRulesMap{
478483
"ruler1": map[string]rulespb.RuleGroupList{
@@ -711,6 +716,24 @@ func TestGetRules(t *testing.T) {
711716
},
712717
expectedClientCallCount: 2, // one of the ruler is pending, so we don't expect that ruler to be called
713718
},
719+
"Shuffle Sharding and ShardSize = 3 and AZ replication with API Rules backup enabled and one ruler in pending state and rulers are in same az": {
720+
sharding: true,
721+
shuffleShardSize: 3,
722+
shardingStrategy: util.ShardingStrategyShuffle,
723+
enableZoneAwareReplication: true,
724+
rulerStateMap: rulerStateMapOnePending,
725+
rulerAZMap: rulerAZSingleZone,
726+
replicationFactor: 3,
727+
rulesRequest: RulesRequest{
728+
Type: recordingRuleFilter,
729+
},
730+
expectedCount: map[string]int{
731+
"user1": 3,
732+
"user2": 5,
733+
"user3": 1,
734+
},
735+
expectedClientCallCount: 2, // one of the ruler is pending, so we don't expect that ruler to be called
736+
},
714737
"Shuffle Sharding and ShardSize = 3 and AZ replication with API Rules backup enabled and two ruler in pending state": {
715738
sharding: true,
716739
shuffleShardSize: 3,

0 commit comments

Comments
 (0)