@@ -4,11 +4,13 @@ package ring
4
4
5
5
import (
6
6
"context"
7
+ "crypto/md5"
8
+ "encoding/binary"
7
9
"errors"
8
10
"flag"
9
11
"fmt"
10
12
"math"
11
- "sort "
13
+ "math/rand "
12
14
"sync"
13
15
"time"
14
16
@@ -107,9 +109,14 @@ type Ring struct {
107
109
KVClient kv.Client
108
110
strategy ReplicationStrategy
109
111
110
- mtx sync.RWMutex
111
- ringDesc * Desc
112
- ringTokens []TokenDesc
112
+ mtx sync.RWMutex
113
+ ringDesc * Desc
114
+ ringTokens []TokenDesc
115
+ ringTokensByZone map [string ][]TokenDesc
116
+
117
+ // List of zones for which there's at least 1 instance in the ring. This list is guaranteed
118
+ // to be sorted alphabetically.
119
+ ringZones []string
113
120
114
121
memberOwnershipDesc * prometheus.Desc
115
122
numMembersDesc * prometheus.Desc
@@ -190,11 +197,15 @@ func (r *Ring) loop(ctx context.Context) error {
190
197
191
198
ringDesc := value .(* Desc )
192
199
ringTokens := ringDesc .getTokens ()
200
+ ringTokensByZone := ringDesc .getTokensByZone ()
201
+ ringZones := getZones (ringTokensByZone )
193
202
194
203
r .mtx .Lock ()
195
204
defer r .mtx .Unlock ()
196
205
r .ringDesc = ringDesc
197
206
r .ringTokens = ringTokens
207
+ r .ringTokensByZone = ringTokensByZone
208
+ r .ringZones = ringZones
198
209
return true
199
210
})
200
211
return nil
@@ -213,7 +224,7 @@ func (r *Ring) Get(key uint32, op Operation, buf []IngesterDesc) (ReplicationSet
213
224
ingesters = buf [:0 ]
214
225
distinctHosts = map [string ]struct {}{}
215
226
distinctZones = map [string ]struct {}{}
216
- start = r . search ( key )
227
+ start = searchToken ( r . ringTokens , key )
217
228
iterations = 0
218
229
)
219
230
for i := start ; len (distinctHosts ) < n && iterations < len (r .ringTokens ); i ++ {
@@ -290,16 +301,6 @@ func (r *Ring) GetAll(op Operation) (ReplicationSet, error) {
290
301
}, nil
291
302
}
292
303
293
- func (r * Ring ) search (key uint32 ) int {
294
- i := sort .Search (len (r .ringTokens ), func (x int ) bool {
295
- return r .ringTokens [x ].Token > key
296
- })
297
- if i >= len (r .ringTokens ) {
298
- i = 0
299
- }
300
- return i
301
- }
302
-
303
304
// Describe implements prometheus.Collector.
304
305
func (r * Ring ) Describe (ch chan <- * prometheus.Desc ) {
305
306
ch <- r .memberOwnershipDesc
@@ -411,7 +412,7 @@ func (r *Ring) Subring(key uint32, n int) ReadRing {
411
412
var (
412
413
ingesters = make (map [string ]IngesterDesc , n )
413
414
distinctHosts = map [string ]struct {}{}
414
- start = r . search ( key )
415
+ start = searchToken ( r . ringTokens , key )
415
416
iterations = 0
416
417
)
417
418
@@ -464,6 +465,101 @@ func (r *Ring) Subring(key uint32, n int) ReadRing {
464
465
return sub
465
466
}
466
467
468
+ // ShuffleShard returns a subring for the provided identifier (eg. a tenant ID)
469
+ // and size (number of instances). The size is expected to be a multiple of the
470
+ // number of zones and the returned subring will contain the same number of
471
+ // instances per zone as far as there are enough registered instances in the ring.
472
+ //
473
+ // The algorithm used to build the subring is a shuffle sharder based on probabilistic
474
+ // hashing. We treat each zone as a separate ring and pick N unique replicas from each
475
+ // zone, walking the ring starting from random but predictable numbers. The random
476
+ // generator is initialised with a seed based on the provided identifier.
477
+ //
478
+ // This implementation guarantees:
479
+ // - Stability: given the same ring, two invocations returns the same result.
480
+ // - Consistency: adding/removing 1 instance from the ring generates a resulting
481
+ // subring with no more then 1 difference.
482
+ // - Shuffling: probabilistically, for a large enough cluster each identifier gets
483
+ // a different set of instances, with a reduced number of overlapping instances
484
+ // between two identifiers.
485
+ func (r * Ring ) ShuffleShard (identifier string , size int ) ReadRing {
486
+ // Nothing to do if the shard size is not smaller then the actual ring.
487
+ if size <= 0 || r .IngesterCount () <= size {
488
+ return r
489
+ }
490
+
491
+ // Use the identifier to compute an hash we'll use to seed the random.
492
+ hasher := md5 .New ()
493
+ hasher .Write ([]byte (identifier )) // nolint:errcheck
494
+ checksum := hasher .Sum (nil )
495
+
496
+ // Generate the seed based on the first 64 bits of the checksum.
497
+ seed := int64 (binary .BigEndian .Uint64 (checksum ))
498
+
499
+ // Initialise the random generator used to select instances in the ring.
500
+ random := rand .New (rand .NewSource (seed ))
501
+
502
+ r .mtx .RLock ()
503
+
504
+ // We expect the shard size to be divisible by the number of zones, in order to
505
+ // have nodes balanced across zones. If it's not, we do round up.
506
+ numInstancesPerZone := int (math .Ceil (float64 (size ) / float64 (len (r .ringZones ))))
507
+
508
+ shard := make (map [string ]IngesterDesc , size )
509
+
510
+ // We need to iterate zones always in the same order to guarantee stability.
511
+ for _ , zone := range r .ringZones {
512
+ tokens := r .ringTokensByZone [zone ]
513
+
514
+ // To select one more instance while guaranteeing the "consistency" property,
515
+ // we do pick a random value from the generator and resolve uniqueness collisions
516
+ // (if any) continuing walking the ring.
517
+ for i := 0 ; i < numInstancesPerZone ; i ++ {
518
+ start := searchToken (tokens , random .Uint32 ())
519
+ iterations := 0
520
+ found := false
521
+
522
+ for p := start ; iterations < len (tokens ); p ++ {
523
+ iterations ++
524
+
525
+ // Wrap p around in the ring.
526
+ p %= len (tokens )
527
+
528
+ // Ensure we select an unique instance.
529
+ if _ , ok := shard [tokens [p ].Ingester ]; ok {
530
+ continue
531
+ }
532
+
533
+ shard [tokens [p ].Ingester ] = r .ringDesc .Ingesters [tokens [p ].Ingester ]
534
+ found = true
535
+ break
536
+ }
537
+
538
+ // If one more instance has not been found, we can stop looking for
539
+ // more instances in this zone, because it means the zone has no more
540
+ // instances which haven't been already selected.
541
+ if ! found {
542
+ break
543
+ }
544
+ }
545
+ }
546
+
547
+ r .mtx .RUnlock ()
548
+
549
+ // Build a read-only ring for the shard.
550
+ shardDesc := & Desc {Ingesters : shard }
551
+ shardTokensByZone := shardDesc .getTokensByZone ()
552
+
553
+ return & Ring {
554
+ cfg : r .cfg ,
555
+ strategy : r .strategy ,
556
+ ringDesc : shardDesc ,
557
+ ringTokens : shardDesc .getTokens (),
558
+ ringTokensByZone : shardTokensByZone ,
559
+ ringZones : getZones (shardTokensByZone ),
560
+ }
561
+ }
562
+
467
563
// GetInstanceState returns the current state of an instance or an error if the
468
564
// instance does not exist in the ring.
469
565
func (r * Ring ) GetInstanceState (instanceID string ) (IngesterState , error ) {
0 commit comments