Skip to content

Commit 21483eb

Browse files
committed
Improved error message when there are not enough healthy instances for the replication set
Signed-off-by: Marco Pracucci <[email protected]>
1 parent 624c9eb commit 21483eb

File tree

5 files changed

+14
-8
lines changed

5 files changed

+14
-8
lines changed

pkg/ring/replication_strategy.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ type ReplicationStrategy interface {
99
// Filter out unhealthy instances and checks if there're enough instances
1010
// for an operation to succeed. Returns an error if there are not enough
1111
// instances.
12-
Filter(instances []IngesterDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration) (healthy []IngesterDesc, maxFailures int, err error)
12+
Filter(instances []IngesterDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration, zoneAwarenessEnabled bool) (healthy []IngesterDesc, maxFailures int, err error)
1313

1414
// ShouldExtendReplicaSet returns true if given an instance that's going to be
1515
// added to the replica set, the replica set size should be extended by 1
@@ -25,7 +25,7 @@ type DefaultReplicationStrategy struct{}
2525
// - Filters out dead ingesters so the one doesn't even try to write to them.
2626
// - Checks there is enough ingesters for an operation to succeed.
2727
// The ingesters argument may be overwritten.
28-
func (s *DefaultReplicationStrategy) Filter(ingesters []IngesterDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration) ([]IngesterDesc, int, error) {
28+
func (s *DefaultReplicationStrategy) Filter(ingesters []IngesterDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration, zoneAwarenessEnabled bool) ([]IngesterDesc, int, error) {
2929
// We need a response from a quorum of ingesters, which is n/2 + 1. In the
3030
// case of a node joining/leaving, the actual replica set might be bigger
3131
// than the replication factor, so use the bigger or the two.
@@ -49,8 +49,14 @@ func (s *DefaultReplicationStrategy) Filter(ingesters []IngesterDesc, op Operati
4949
// This is just a shortcut - if there are not minSuccess available ingesters,
5050
// after filtering out dead ones, don't even bother trying.
5151
if len(ingesters) < minSuccess {
52-
err := fmt.Errorf("at least %d live replicas required, could only find %d",
53-
minSuccess, len(ingesters))
52+
var err error
53+
54+
if zoneAwarenessEnabled {
55+
err = fmt.Errorf("at least %d live replicas required across different availability zones, could only find %d", minSuccess, len(ingesters))
56+
} else {
57+
err = fmt.Errorf("at least %d live replicas required, could only find %d", minSuccess, len(ingesters))
58+
}
59+
5460
return nil, 0, err
5561
}
5662

pkg/ring/replication_strategy_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func TestRingReplicationStrategy(t *testing.T) {
9191

9292
t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
9393
strategy := &DefaultReplicationStrategy{}
94-
liveIngesters, maxFailure, err := strategy.Filter(ingesters, tc.op, tc.RF, 100*time.Second)
94+
liveIngesters, maxFailure, err := strategy.Filter(ingesters, tc.op, tc.RF, 100*time.Second, false)
9595
if tc.ExpectedError == "" {
9696
assert.NoError(t, err)
9797
assert.Equal(t, tc.LiveIngesters, len(liveIngesters))

pkg/ring/ring.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet
263263
ingesters = append(ingesters, ingester)
264264
}
265265

266-
liveIngesters, maxFailure, err := r.strategy.Filter(ingesters, op, r.cfg.ReplicationFactor, r.cfg.HeartbeatTimeout)
266+
liveIngesters, maxFailure, err := r.strategy.Filter(ingesters, op, r.cfg.ReplicationFactor, r.cfg.HeartbeatTimeout, r.cfg.ZoneAwarenessEnabled)
267267
if err != nil {
268268
return ReplicationSet{}, err
269269
}

pkg/ring/ring_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,7 @@ func TestRing_Get_ZoneAwareness(t *testing.T) {
147147
numZones: 1,
148148
replicationFactor: 3,
149149
zoneAwarenessEnabled: true,
150-
expectedErr: "at least 2 live replicas required, could only find 1",
150+
expectedErr: "at least 2 live replicas required across different availability zones, could only find 1",
151151
},
152152
"should succeed if there are ingesters in 2 zones on RF = 3": {
153153
numIngesters: 16,

pkg/storegateway/replication_strategy.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ import (
99

1010
type BlocksReplicationStrategy struct{}
1111

12-
func (s *BlocksReplicationStrategy) Filter(instances []ring.IngesterDesc, op ring.Operation, replicationFactor int, heartbeatTimeout time.Duration) ([]ring.IngesterDesc, int, error) {
12+
func (s *BlocksReplicationStrategy) Filter(instances []ring.IngesterDesc, op ring.Operation, _ int, heartbeatTimeout time.Duration, _ bool) ([]ring.IngesterDesc, int, error) {
1313
// Filter out unhealthy instances.
1414
for i := 0; i < len(instances); {
1515
if instances[i].IsHealthy(op, heartbeatTimeout) {

0 commit comments

Comments
 (0)