diff --git a/CHANGELOG.md b/CHANGELOG.md index cadba2e6eed..a9d172a2468 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,7 +26,8 @@ * [BUGFIX] Set appropriate `Content-Type` header for /services endpoint, which previously hard-coded `text/plain`. #4596 * [BUGFIX] Querier: Disable query scheduler SRV DNS lookup, which removes noisy log messages about "failed DNS SRV record lookup". #4601 * [BUGFIX] Memberlist: fixed corrupted packets when sending compound messages with more than 255 messages or messages bigger than 64KB. #4601 -* [BUGIX] Query Frontend: If 'LogQueriesLongerThan' is set to < 0, log all queries as described in the docs. #4633 +* [BUGFIX] Query Frontend: If 'LogQueriesLongerThan' is set to < 0, log all queries as described in the docs. #4633 +* [BUGFIX] Distributor: update defaultReplicationStrategy to not fail with extend-write when a single instance is unhealthy. #4636 ## 1.11.0 2021-11-25 diff --git a/pkg/ring/replication_strategy.go b/pkg/ring/replication_strategy.go index 44e05a53833..32eff1384a5 100644 --- a/pkg/ring/replication_strategy.go +++ b/pkg/ring/replication_strategy.go @@ -26,19 +26,9 @@ func NewDefaultReplicationStrategy() ReplicationStrategy { // - Checks there are enough instances for an operation to succeed. // The instances argument may be overwritten. func (s *defaultReplicationStrategy) Filter(instances []InstanceDesc, op Operation, replicationFactor int, heartbeatTimeout time.Duration, zoneAwarenessEnabled bool) ([]InstanceDesc, int, error) { - // We need a response from a quorum of instances, which is n/2 + 1. In the - // case of a node joining/leaving, the actual replica set might be bigger - // than the replication factor, so use the bigger or the two. - if len(instances) > replicationFactor { - replicationFactor = len(instances) - } - - minSuccess := (replicationFactor / 2) + 1 now := time.Now() - // Skip those that have not heartbeated in a while. NB these are still - // included in the calculation of minSuccess, so if too many failed instances - // will cause the whole write to fail. + // Skip those that have not heartbeated in a while. var unhealthy []string for i := 0; i < len(instances); { if instances[i].IsHealthy(op, heartbeatTimeout, now) { @@ -49,6 +39,14 @@ func (s *defaultReplicationStrategy) Filter(instances []InstanceDesc, op Operati } } + // We need a response from a quorum of instances, which is n/2 + 1. In the + // case of a node joining/leaving with extend-writes enabled, the actual replica + // set will be bigger than the replication factor, so use the bigger or the two. + if len(instances) > replicationFactor { + replicationFactor = len(instances) + } + + minSuccess := (replicationFactor / 2) + 1 // This is just a shortcut - if there are not minSuccess available instances, // after filtering out dead ones, don't even bother trying. if len(instances) < minSuccess { diff --git a/pkg/ring/replication_strategy_test.go b/pkg/ring/replication_strategy_test.go index 1fe5d0e9187..75af12671ce 100644 --- a/pkg/ring/replication_strategy_test.go +++ b/pkg/ring/replication_strategy_test.go @@ -68,14 +68,14 @@ func TestRingReplicationStrategy(t *testing.T) { replicationFactor: 3, liveIngesters: 3, deadIngesters: 1, - expectedMaxFailure: 0, + expectedMaxFailure: 1, }, { - replicationFactor: 3, - liveIngesters: 2, - deadIngesters: 2, - expectedError: "at least 3 live replicas required, could only find 2 - unhealthy instances: dead1,dead2", + replicationFactor: 3, + liveIngesters: 2, + deadIngesters: 2, + expectedMaxFailure: 0, }, } { ingesters := []InstanceDesc{}