diff --git a/pkg/alertmanager/alertmanager_client.go b/pkg/alertmanager/alertmanager_client.go index 51ce369c39a..57571a31464 100644 --- a/pkg/alertmanager/alertmanager_client.go +++ b/pkg/alertmanager/alertmanager_client.go @@ -7,7 +7,6 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/crypto/tls" "github.com/grafana/dskit/grpcclient" - "github.com/grafana/dskit/ring/client" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -15,6 +14,7 @@ import ( "google.golang.org/grpc/health/grpc_health_v1" "github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb" + "github.com/cortexproject/cortex/pkg/ring/client" ) // ClientsPool is the interface used to get the client from the pool for a specified address. diff --git a/pkg/alertmanager/alertmanager_ring.go b/pkg/alertmanager/alertmanager_ring.go index 6c0eefc7c8b..93daf36850c 100644 --- a/pkg/alertmanager/alertmanager_ring.go +++ b/pkg/alertmanager/alertmanager_ring.go @@ -10,8 +10,8 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" util_log "github.com/cortexproject/cortex/pkg/util/log" ) diff --git a/pkg/alertmanager/alertmanager_ring_test.go b/pkg/alertmanager/alertmanager_ring_test.go index 266e28659ac..3e4d460252e 100644 --- a/pkg/alertmanager/alertmanager_ring_test.go +++ b/pkg/alertmanager/alertmanager_ring_test.go @@ -4,8 +4,9 @@ import ( "testing" "time" - "github.com/grafana/dskit/ring" "github.com/stretchr/testify/assert" + + "github.com/cortexproject/cortex/pkg/ring" ) func TestIsHealthyForAlertmanagerOperations(t *testing.T) { diff --git a/pkg/alertmanager/distributor.go b/pkg/alertmanager/distributor.go index b8db92f6a98..53112160a1d 100644 --- a/pkg/alertmanager/distributor.go +++ b/pkg/alertmanager/distributor.go @@ -12,8 +12,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" @@ -22,6 +20,8 @@ import ( "github.com/weaveworks/common/user" "github.com/cortexproject/cortex/pkg/alertmanager/merger" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" util_log "github.com/cortexproject/cortex/pkg/util/log" diff --git a/pkg/alertmanager/distributor_test.go b/pkg/alertmanager/distributor_test.go index 6ea6edb19d1..304ceebea61 100644 --- a/pkg/alertmanager/distributor_test.go +++ b/pkg/alertmanager/distributor_test.go @@ -17,7 +17,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" @@ -28,6 +27,7 @@ import ( "google.golang.org/grpc/health/grpc_health_v1" "github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb" + "github.com/cortexproject/cortex/pkg/ring" util_log "github.com/cortexproject/cortex/pkg/util/log" "github.com/cortexproject/cortex/pkg/util/test" ) diff --git a/pkg/alertmanager/lifecycle.go b/pkg/alertmanager/lifecycle.go index b3a9e894045..54e420701a4 100644 --- a/pkg/alertmanager/lifecycle.go +++ b/pkg/alertmanager/lifecycle.go @@ -1,7 +1,7 @@ package alertmanager import ( - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" ) func (r *MultitenantAlertmanager) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, instanceID string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { diff --git a/pkg/alertmanager/multitenant.go b/pkg/alertmanager/multitenant.go index 46fa8a8931d..2e229fbec7e 100644 --- a/pkg/alertmanager/multitenant.go +++ b/pkg/alertmanager/multitenant.go @@ -18,8 +18,6 @@ import ( "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/alertmanager/cluster" @@ -36,6 +34,8 @@ import ( "github.com/cortexproject/cortex/pkg/alertmanager/alertmanagerpb" "github.com/cortexproject/cortex/pkg/alertmanager/alertspb" "github.com/cortexproject/cortex/pkg/alertmanager/alertstore" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" util_log "github.com/cortexproject/cortex/pkg/util/log" diff --git a/pkg/alertmanager/multitenant_test.go b/pkg/alertmanager/multitenant_test.go index 068213599ae..1e676c3da1d 100644 --- a/pkg/alertmanager/multitenant_test.go +++ b/pkg/alertmanager/multitenant_test.go @@ -23,7 +23,6 @@ import ( "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/alertmanager/cluster/clusterpb" "github.com/prometheus/alertmanager/notify" @@ -45,6 +44,7 @@ import ( "github.com/cortexproject/cortex/pkg/alertmanager/alertspb" "github.com/cortexproject/cortex/pkg/alertmanager/alertstore" "github.com/cortexproject/cortex/pkg/alertmanager/alertstore/bucketclient" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/test" diff --git a/pkg/api/api.go b/pkg/api/api.go index d7635d2c56d..ced7685448f 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -12,7 +12,6 @@ import ( "github.com/felixge/fgprof" "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/storage" "github.com/weaveworks/common/middleware" @@ -31,6 +30,7 @@ import ( "github.com/cortexproject/cortex/pkg/frontend/v2/frontendv2pb" "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/querier" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/scheduler" "github.com/cortexproject/cortex/pkg/scheduler/schedulerpb" diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 337ec76c687..75b83fc1e17 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -17,7 +17,6 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/flagext" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -29,6 +28,7 @@ import ( "github.com/thanos-io/thanos/pkg/compact/downsample" "github.com/thanos-io/thanos/pkg/objstore" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" diff --git a/pkg/compactor/compactor_ring.go b/pkg/compactor/compactor_ring.go index b0683c02dda..28b20e6734c 100644 --- a/pkg/compactor/compactor_ring.go +++ b/pkg/compactor/compactor_ring.go @@ -8,8 +8,8 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" util_log "github.com/cortexproject/cortex/pkg/util/log" ) diff --git a/pkg/compactor/compactor_ring_test.go b/pkg/compactor/compactor_ring_test.go index 0ef8f10bdbc..3a70ccde2b9 100644 --- a/pkg/compactor/compactor_ring_test.go +++ b/pkg/compactor/compactor_ring_test.go @@ -5,8 +5,9 @@ import ( "time" "github.com/grafana/dskit/flagext" - "github.com/grafana/dskit/ring" "github.com/stretchr/testify/assert" + + "github.com/cortexproject/cortex/pkg/ring" ) func TestRingConfig_DefaultConfigToLifecyclerConfig(t *testing.T) { diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index ebb2f956b87..61b31a511fd 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -21,7 +21,6 @@ import ( "github.com/grafana/dskit/concurrency" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -37,6 +36,7 @@ import ( "github.com/thanos-io/thanos/pkg/objstore" "gopkg.in/yaml.v2" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" cortex_testutil "github.com/cortexproject/cortex/pkg/util/test" diff --git a/pkg/cortex/cortex.go b/pkg/cortex/cortex.go index 31ff0d67281..0a6ca6c2c1f 100644 --- a/pkg/cortex/cortex.go +++ b/pkg/cortex/cortex.go @@ -16,7 +16,6 @@ import ( "github.com/grafana/dskit/grpcutil" "github.com/grafana/dskit/kv/memberlist" "github.com/grafana/dskit/modules" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/runtimeconfig" "github.com/grafana/dskit/services" "github.com/pkg/errors" @@ -51,6 +50,7 @@ import ( "github.com/cortexproject/cortex/pkg/querier/queryrange" "github.com/cortexproject/cortex/pkg/querier/tenantfederation" querier_worker "github.com/cortexproject/cortex/pkg/querier/worker" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/cortexproject/cortex/pkg/scheduler" diff --git a/pkg/cortex/cortex_test.go b/pkg/cortex/cortex_test.go index 1584b27854e..7c7c2c615c4 100644 --- a/pkg/cortex/cortex_test.go +++ b/pkg/cortex/cortex_test.go @@ -14,7 +14,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" @@ -27,6 +26,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk/storage" "github.com/cortexproject/cortex/pkg/frontend/v1/frontendv1pb" "github.com/cortexproject/cortex/pkg/ingester" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/scheduler/schedulerpb" "github.com/cortexproject/cortex/pkg/storage/bucket" diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index ffe4f01ea81..b5cc4c921cf 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -12,7 +12,6 @@ import ( "github.com/grafana/dskit/kv/codec" "github.com/grafana/dskit/kv/memberlist" "github.com/grafana/dskit/modules" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/runtimeconfig" "github.com/grafana/dskit/services" "github.com/opentracing-contrib/go-stdlib/nethttp" @@ -44,6 +43,7 @@ import ( "github.com/cortexproject/cortex/pkg/querier/queryrange" "github.com/cortexproject/cortex/pkg/querier/tenantfederation" querier_worker "github.com/cortexproject/cortex/pkg/querier/worker" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler" "github.com/cortexproject/cortex/pkg/scheduler" "github.com/cortexproject/cortex/pkg/storegateway" diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index 2620256080e..2088f28a4f9 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -12,8 +12,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/limiter" - "github.com/grafana/dskit/ring" - ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/opentracing/opentracing-go" "github.com/pkg/errors" @@ -31,6 +29,8 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" ingester_client "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/prom1/storage/metric" + "github.com/cortexproject/cortex/pkg/ring" + ring_client "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/extract" diff --git a/pkg/distributor/distributor_ring.go b/pkg/distributor/distributor_ring.go index ceefa198c2d..e1e6135d2cd 100644 --- a/pkg/distributor/distributor_ring.go +++ b/pkg/distributor/distributor_ring.go @@ -8,8 +8,8 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" util_log "github.com/cortexproject/cortex/pkg/util/log" ) diff --git a/pkg/distributor/distributor_ring_test.go b/pkg/distributor/distributor_ring_test.go index 15d696fb819..b34a8326c82 100644 --- a/pkg/distributor/distributor_ring_test.go +++ b/pkg/distributor/distributor_ring_test.go @@ -5,8 +5,9 @@ import ( "time" "github.com/grafana/dskit/flagext" - "github.com/grafana/dskit/ring" "github.com/stretchr/testify/assert" + + "github.com/cortexproject/cortex/pkg/ring" ) func TestRingConfig_DefaultConfigToLifecyclerConfig(t *testing.T) { diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 8957f754b51..17288af26a8 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -18,8 +18,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" - ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" @@ -39,6 +37,8 @@ import ( "github.com/cortexproject/cortex/pkg/ingester" "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/prom1/storage/metric" + "github.com/cortexproject/cortex/pkg/ring" + ring_client "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/chunkcompat" diff --git a/pkg/distributor/ha_tracker_test.go b/pkg/distributor/ha_tracker_test.go index 4da4a9906ee..cd7bd5dfe92 100644 --- a/pkg/distributor/ha_tracker_test.go +++ b/pkg/distributor/ha_tracker_test.go @@ -11,7 +11,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -23,6 +22,7 @@ import ( "github.com/weaveworks/common/user" "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/util" util_log "github.com/cortexproject/cortex/pkg/util/log" "github.com/cortexproject/cortex/pkg/util/test" diff --git a/pkg/distributor/ingester_client_pool.go b/pkg/distributor/ingester_client_pool.go index 6f97702e75c..007d8872ebb 100644 --- a/pkg/distributor/ingester_client_pool.go +++ b/pkg/distributor/ingester_client_pool.go @@ -5,10 +5,11 @@ import ( "time" "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - ring_client "github.com/grafana/dskit/ring/client" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/cortexproject/cortex/pkg/ring" + ring_client "github.com/cortexproject/cortex/pkg/ring/client" ) var clients = promauto.NewGauge(prometheus.GaugeOpts{ diff --git a/pkg/distributor/query.go b/pkg/distributor/query.go index da22f35187b..e2d3e42d88a 100644 --- a/pkg/distributor/query.go +++ b/pkg/distributor/query.go @@ -7,7 +7,6 @@ import ( "time" "github.com/grafana/dskit/grpcutil" - "github.com/grafana/dskit/ring" "github.com/opentracing/opentracing-go" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -16,6 +15,7 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" ingester_client "github.com/cortexproject/cortex/pkg/ingester/client" "github.com/cortexproject/cortex/pkg/querier/stats" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/extract" diff --git a/pkg/ingester/flush_test.go b/pkg/ingester/flush_test.go index 8b7a3f7463e..16d873f7aa0 100644 --- a/pkg/ingester/flush_test.go +++ b/pkg/ingester/flush_test.go @@ -10,7 +10,6 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -21,6 +20,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/validation" ) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 10275e08274..9dd65d0add0 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -13,7 +13,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/gogo/status" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -29,6 +28,7 @@ import ( cortex_chunk "github.com/cortexproject/cortex/pkg/chunk" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 9fd6c7808a3..5b25de6487c 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -17,7 +17,6 @@ import ( "time" "github.com/go-kit/log" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" @@ -33,6 +32,7 @@ import ( promchunk "github.com/cortexproject/cortex/pkg/chunk/encoding" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/util/chunkcompat" "github.com/cortexproject/cortex/pkg/util/test" "github.com/cortexproject/cortex/pkg/util/validation" diff --git a/pkg/ingester/ingester_v2.go b/pkg/ingester/ingester_v2.go index 887459df58b..0924e0874cc 100644 --- a/pkg/ingester/ingester_v2.go +++ b/pkg/ingester/ingester_v2.go @@ -14,7 +14,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/concurrency" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -36,6 +35,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk/encoding" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/tenant" diff --git a/pkg/ingester/ingester_v2_test.go b/pkg/ingester/ingester_v2_test.go index cc5ac10b720..cd7c379a9d8 100644 --- a/pkg/ingester/ingester_v2_test.go +++ b/pkg/ingester/ingester_v2_test.go @@ -21,7 +21,6 @@ import ( "time" "github.com/go-kit/log" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -46,6 +45,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk/encoding" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/util" util_math "github.com/cortexproject/cortex/pkg/util/math" diff --git a/pkg/ingester/lifecycle_test.go b/pkg/ingester/lifecycle_test.go index ed3bf53a340..78b38236fc3 100644 --- a/pkg/ingester/lifecycle_test.go +++ b/pkg/ingester/lifecycle_test.go @@ -14,7 +14,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -27,6 +26,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/util/test" "github.com/cortexproject/cortex/pkg/util/validation" ) diff --git a/pkg/ingester/transfer.go b/pkg/ingester/transfer.go index d2466cbaff0..a0a3ab2a610 100644 --- a/pkg/ingester/transfer.go +++ b/pkg/ingester/transfer.go @@ -10,7 +10,6 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" - "github.com/grafana/dskit/ring" "github.com/pkg/errors" "github.com/prometheus/common/model" "github.com/weaveworks/common/user" @@ -18,6 +17,7 @@ import ( "github.com/cortexproject/cortex/pkg/chunk/encoding" "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/ingester/client" + "github.com/cortexproject/cortex/pkg/ring" ) var ( diff --git a/pkg/querier/blocks_store_balanced_set.go b/pkg/querier/blocks_store_balanced_set.go index 8f58c123d14..0494c054f5b 100644 --- a/pkg/querier/blocks_store_balanced_set.go +++ b/pkg/querier/blocks_store_balanced_set.go @@ -9,7 +9,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -17,6 +16,7 @@ import ( "github.com/thanos-io/thanos/pkg/discovery/dns" "github.com/thanos-io/thanos/pkg/extprom" + "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/util" ) diff --git a/pkg/querier/blocks_store_queryable.go b/pkg/querier/blocks_store_queryable.go index 01905687e50..6854595595a 100644 --- a/pkg/querier/blocks_store_queryable.go +++ b/pkg/querier/blocks_store_queryable.go @@ -13,7 +13,6 @@ import ( "github.com/go-kit/log/level" "github.com/gogo/protobuf/types" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -33,6 +32,7 @@ import ( "github.com/cortexproject/cortex/pkg/cortexpb" "github.com/cortexproject/cortex/pkg/querier/series" "github.com/cortexproject/cortex/pkg/querier/stats" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storage/tsdb/bucketindex" diff --git a/pkg/querier/blocks_store_replicated_set.go b/pkg/querier/blocks_store_replicated_set.go index 9ae4ce3f448..435a8aa60b2 100644 --- a/pkg/querier/blocks_store_replicated_set.go +++ b/pkg/querier/blocks_store_replicated_set.go @@ -6,13 +6,13 @@ import ( "math/rand" "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/ring/client" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storegateway" "github.com/cortexproject/cortex/pkg/util" diff --git a/pkg/querier/blocks_store_replicated_set_test.go b/pkg/querier/blocks_store_replicated_set_test.go index c7af92d8e06..c614946ac48 100644 --- a/pkg/querier/blocks_store_replicated_set_test.go +++ b/pkg/querier/blocks_store_replicated_set_test.go @@ -10,7 +10,6 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" @@ -18,6 +17,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/cortexproject/cortex/pkg/ring" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/test" diff --git a/pkg/querier/store_gateway_client.go b/pkg/querier/store_gateway_client.go index e5fa71e747d..528c3c2fd93 100644 --- a/pkg/querier/store_gateway_client.go +++ b/pkg/querier/store_gateway_client.go @@ -7,13 +7,13 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/crypto/tls" "github.com/grafana/dskit/grpcclient" - "github.com/grafana/dskit/ring/client" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/grpc" "google.golang.org/grpc/health/grpc_health_v1" + "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/storegateway/storegatewaypb" ) diff --git a/pkg/querier/worker/scheduler_processor.go b/pkg/querier/worker/scheduler_processor.go index d881ea61410..ee0b2028425 100644 --- a/pkg/querier/worker/scheduler_processor.go +++ b/pkg/querier/worker/scheduler_processor.go @@ -11,7 +11,6 @@ import ( "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/grpcclient" dsmiddleware "github.com/grafana/dskit/middleware" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" otgrpc "github.com/opentracing-contrib/go-grpc" "github.com/opentracing/opentracing-go" @@ -25,6 +24,7 @@ import ( "github.com/cortexproject/cortex/pkg/frontend/v2/frontendv2pb" querier_stats "github.com/cortexproject/cortex/pkg/querier/stats" + "github.com/cortexproject/cortex/pkg/ring/client" "github.com/cortexproject/cortex/pkg/scheduler/schedulerpb" "github.com/cortexproject/cortex/pkg/util/httpgrpcutil" util_log "github.com/cortexproject/cortex/pkg/util/log" diff --git a/vendor/github.com/grafana/dskit/ring/basic_lifecycler.go b/pkg/ring/basic_lifecycler.go similarity index 99% rename from vendor/github.com/grafana/dskit/ring/basic_lifecycler.go rename to pkg/ring/basic_lifecycler.go index 726a85430d3..c5a4d2df2fc 100644 --- a/vendor/github.com/grafana/dskit/ring/basic_lifecycler.go +++ b/pkg/ring/basic_lifecycler.go @@ -9,11 +9,10 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/pkg/errors" - "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/dskit/kv" "github.com/grafana/dskit/services" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" ) type BasicLifecyclerDelegate interface { diff --git a/vendor/github.com/grafana/dskit/ring/basic_lifecycler_delegates.go b/pkg/ring/basic_lifecycler_delegates.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/basic_lifecycler_delegates.go rename to pkg/ring/basic_lifecycler_delegates.go diff --git a/pkg/ring/basic_lifecycler_delegates_test.go b/pkg/ring/basic_lifecycler_delegates_test.go new file mode 100644 index 00000000000..0bdb2b0658c --- /dev/null +++ b/pkg/ring/basic_lifecycler_delegates_test.go @@ -0,0 +1,306 @@ +package ring + +import ( + "context" + "os" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/concurrency" + "github.com/grafana/dskit/services" + "github.com/grafana/dskit/test" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestLeaveOnStoppingDelegate(t *testing.T) { + onStoppingCalled := false + + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + + testDelegate := &mockDelegate{ + onStopping: func(l *BasicLifecycler) { + assert.Equal(t, LEAVING, l.GetState()) + onStoppingCalled = true + }, + } + + leaveDelegate := NewLeaveOnStoppingDelegate(testDelegate, log.NewNopLogger()) + lifecycler, _, err := prepareBasicLifecyclerWithDelegate(t, cfg, leaveDelegate) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + assert.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + assert.True(t, onStoppingCalled) +} + +func TestTokensPersistencyDelegate_ShouldSkipTokensLoadingIfFileDoesNotExist(t *testing.T) { + // Create a temporary file and immediately delete it. + tokensFile, err := os.CreateTemp("", "tokens-*") + require.NoError(t, err) + require.NoError(t, os.Remove(tokensFile.Name())) + + testDelegate := &mockDelegate{ + onRegister: func(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + assert.False(t, instanceExists) + return JOINING, Tokens{1, 2, 3, 4, 5} + }, + } + + logs := &concurrency.SyncBuffer{} + logger := log.NewLogfmtLogger(logs) + persistencyDelegate := NewTokensPersistencyDelegate(tokensFile.Name(), ACTIVE, testDelegate, logger) + + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, _, err := prepareBasicLifecyclerWithDelegate(t, cfg, persistencyDelegate) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, JOINING, lifecycler.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + + require.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + + // Ensure tokens have been stored. + actualTokens, err := LoadTokensFromFile(tokensFile.Name()) + require.NoError(t, err) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, actualTokens) + + // Ensure no error has been logged. + assert.Empty(t, logs.String()) +} + +func TestTokensPersistencyDelegate_ShouldLoadTokensFromFileIfFileExist(t *testing.T) { + tokensFile, err := os.CreateTemp("", "tokens-*") + require.NoError(t, err) + defer os.Remove(tokensFile.Name()) //nolint:errcheck + + // Store some tokens to the file. + storedTokens := Tokens{6, 7, 8, 9, 10} + require.NoError(t, storedTokens.StoreToFile(tokensFile.Name())) + + testDelegate := &mockDelegate{ + onRegister: func(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + assert.True(t, instanceExists) + assert.Equal(t, ACTIVE, instanceDesc.GetState()) + assert.Equal(t, storedTokens, Tokens(instanceDesc.GetTokens())) + assert.True(t, instanceDesc.GetRegisteredAt().IsZero()) + + return instanceDesc.GetState(), instanceDesc.GetTokens() + }, + } + + persistencyDelegate := NewTokensPersistencyDelegate(tokensFile.Name(), ACTIVE, testDelegate, log.NewNopLogger()) + + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, _, err := prepareBasicLifecyclerWithDelegate(t, cfg, persistencyDelegate) + require.NoError(t, err) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, ACTIVE, lifecycler.GetState()) + assert.Equal(t, storedTokens, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.InDelta(t, time.Now().Unix(), lifecycler.GetRegisteredAt().Unix(), 2) + + require.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + + // Ensure we can still read back the tokens file. + actualTokens, err := LoadTokensFromFile(tokensFile.Name()) + require.NoError(t, err) + assert.Equal(t, storedTokens, actualTokens) +} + +func TestTokensPersistencyDelegate_ShouldHandleTheCaseTheInstanceIsAlreadyInTheRing(t *testing.T) { + storedTokens := Tokens{6, 7, 8, 9, 10} + differentTokens := Tokens{1, 2, 3, 4, 5} + + tests := map[string]struct { + storedTokens Tokens + initialState InstanceState + initialTokens Tokens + expectedState InstanceState + expectedTokens Tokens + }{ + "instance already registered in the ring without tokens": { + initialState: PENDING, + initialTokens: nil, + expectedState: ACTIVE, + expectedTokens: storedTokens, + }, + "instance already registered in the ring with tokens": { + initialState: JOINING, + initialTokens: differentTokens, + expectedState: JOINING, + expectedTokens: differentTokens, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + tokensFile, err := os.CreateTemp("", "tokens-*") + require.NoError(t, err) + defer os.Remove(tokensFile.Name()) //nolint:errcheck + + // Store some tokens to the file. + require.NoError(t, storedTokens.StoreToFile(tokensFile.Name())) + + // We assume is already registered to the ring. + registeredAt := time.Now().Add(-time.Hour) + + testDelegate := &mockDelegate{ + onRegister: func(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + return instanceDesc.GetState(), instanceDesc.GetTokens() + }, + } + + persistencyDelegate := NewTokensPersistencyDelegate(tokensFile.Name(), ACTIVE, testDelegate, log.NewNopLogger()) + + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, store, err := prepareBasicLifecyclerWithDelegate(t, cfg, persistencyDelegate) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + // Add the instance to the ring. + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + ringDesc := NewDesc() + ringDesc.AddIngester(cfg.ID, cfg.Addr, cfg.Zone, testData.initialTokens, testData.initialState, registeredAt) + return ringDesc, true, nil + })) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, testData.expectedState, lifecycler.GetState()) + assert.Equal(t, testData.expectedTokens, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.Equal(t, registeredAt.Unix(), lifecycler.GetRegisteredAt().Unix()) + }) + } +} + +// TestDelegatesChain tests chaining all provided delegates together. +func TestDelegatesChain(t *testing.T) { + onStoppingCalled := false + + // Create a temporary file and immediately delete it. + tokensFile, err := os.CreateTemp("", "tokens-*") + require.NoError(t, err) + require.NoError(t, os.Remove(tokensFile.Name())) + + // Chain delegates together. + var chain BasicLifecyclerDelegate + chain = &mockDelegate{ + onRegister: func(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + assert.False(t, instanceExists) + return JOINING, Tokens{1, 2, 3, 4, 5} + }, + onStopping: func(l *BasicLifecycler) { + assert.Equal(t, LEAVING, l.GetState()) + onStoppingCalled = true + }, + } + + chain = NewTokensPersistencyDelegate(tokensFile.Name(), ACTIVE, chain, log.NewNopLogger()) + chain = NewLeaveOnStoppingDelegate(chain, log.NewNopLogger()) + chain = NewAutoForgetDelegate(time.Minute, chain, log.NewNopLogger()) + + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, _, err := prepareBasicLifecyclerWithDelegate(t, cfg, chain) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, JOINING, lifecycler.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + + require.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + assert.True(t, onStoppingCalled) + + // Ensure tokens have been stored. + actualTokens, err := LoadTokensFromFile(tokensFile.Name()) + require.NoError(t, err) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, actualTokens) +} + +func TestAutoForgetDelegate(t *testing.T) { + const forgetPeriod = time.Minute + registeredAt := time.Now() + + tests := map[string]struct { + setup func(ringDesc *Desc) + expectedInstances []string + }{ + "no unhealthy instance in the ring": { + setup: func(ringDesc *Desc) { + ringDesc.AddIngester("instance-1", "1.1.1.1", "", nil, ACTIVE, registeredAt) + }, + expectedInstances: []string{testInstanceID, "instance-1"}, + }, + "unhealthy instance in the ring that has NOTreached the forget period yet": { + setup: func(ringDesc *Desc) { + i := ringDesc.AddIngester("instance-1", "1.1.1.1", "", nil, ACTIVE, registeredAt) + i.Timestamp = time.Now().Add(-forgetPeriod).Add(5 * time.Second).Unix() + ringDesc.Ingesters["instance-1"] = i + }, + expectedInstances: []string{testInstanceID, "instance-1"}, + }, + "unhealthy instance in the ring that has reached the forget period": { + setup: func(ringDesc *Desc) { + i := ringDesc.AddIngester("instance-1", "1.1.1.1", "", nil, ACTIVE, registeredAt) + i.Timestamp = time.Now().Add(-forgetPeriod).Add(-5 * time.Second).Unix() + ringDesc.Ingesters["instance-1"] = i + }, + expectedInstances: []string{testInstanceID}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.HeartbeatPeriod = 100 * time.Millisecond + + testDelegate := &mockDelegate{} + + autoForgetDelegate := NewAutoForgetDelegate(forgetPeriod, testDelegate, log.NewNopLogger()) + lifecycler, store, err := prepareBasicLifecyclerWithDelegate(t, cfg, autoForgetDelegate) + require.NoError(t, err) + + // Setup the initial state of the ring. + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + ringDesc := NewDesc() + testData.setup(ringDesc) + return ringDesc, true, nil + })) + + // Start the lifecycler. + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + // Wait until an heartbeat has been sent. + test.Poll(t, time.Second, true, func() interface{} { + return testutil.ToFloat64(lifecycler.metrics.heartbeats) > 0 + }) + + // Read back the ring status from the store. + v, err := store.Get(ctx, testRingKey) + require.NoError(t, err) + require.NotNil(t, v) + + var actualInstances []string + for id := range GetOrCreateRingDesc(v).GetIngesters() { + actualInstances = append(actualInstances, id) + } + + assert.ElementsMatch(t, testData.expectedInstances, actualInstances) + }) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/basic_lifecycler_metrics.go b/pkg/ring/basic_lifecycler_metrics.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/basic_lifecycler_metrics.go rename to pkg/ring/basic_lifecycler_metrics.go diff --git a/pkg/ring/basic_lifecycler_test.go b/pkg/ring/basic_lifecycler_test.go new file mode 100644 index 00000000000..d1239329914 --- /dev/null +++ b/pkg/ring/basic_lifecycler_test.go @@ -0,0 +1,506 @@ +package ring + +import ( + "context" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/kv" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/services" + "github.com/grafana/dskit/test" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + testRingKey = "test" + testRingName = "test" + testInstanceID = "test-id" +) + +func TestBasicLifecycler_RegisterOnStart(t *testing.T) { + tests := map[string]struct { + initialInstanceID string + initialInstanceDesc *InstanceDesc + registerState InstanceState + registerTokens Tokens + }{ + "initial ring is empty": { + registerState: ACTIVE, + registerTokens: Tokens{1, 2, 3, 4, 5}, + }, + "initial ring non empty (containing another instance)": { + initialInstanceID: "instance-1", + initialInstanceDesc: &InstanceDesc{ + Addr: "1.1.1.1", + State: ACTIVE, + Tokens: Tokens{6, 7, 8, 9, 10}, + RegisteredTimestamp: time.Now().Add(-time.Hour).Unix(), + }, + registerState: ACTIVE, + registerTokens: Tokens{1, 2, 3, 4, 5}, + }, + "initial ring contains the same instance with different state, tokens and address (new one is 127.0.0.1)": { + initialInstanceID: testInstanceID, + initialInstanceDesc: &InstanceDesc{ + Addr: "1.1.1.1", + State: ACTIVE, + Tokens: Tokens{6, 7, 8, 9, 10}, + RegisteredTimestamp: time.Now().Add(-time.Hour).Unix(), + }, + registerState: JOINING, + registerTokens: Tokens{1, 2, 3, 4, 5}, + }, + "initial ring contains the same instance with different address (new one is 127.0.0.1)": { + initialInstanceID: testInstanceID, + initialInstanceDesc: &InstanceDesc{ + Addr: "1.1.1.1", + State: ACTIVE, + Tokens: Tokens{1, 2, 3, 4, 5}, + RegisteredTimestamp: time.Now().Add(-time.Hour).Unix(), + }, + registerState: ACTIVE, + registerTokens: Tokens{1, 2, 3, 4, 5}, + }, + "initial ring contains the same instance with registered timestamp == 0": { + initialInstanceID: testInstanceID, + initialInstanceDesc: &InstanceDesc{ + Addr: "1.1.1.1", + State: ACTIVE, + Tokens: Tokens{1, 2, 3, 4, 5}, + RegisteredTimestamp: 0, + }, + registerState: ACTIVE, + registerTokens: Tokens{1, 2, 3, 4, 5}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + // Add an initial instance to the ring. + if testData.initialInstanceDesc != nil { + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + desc := testData.initialInstanceDesc + + ringDesc := GetOrCreateRingDesc(in) + ringDesc.AddIngester(testData.initialInstanceID, desc.Addr, desc.Zone, desc.Tokens, desc.State, desc.GetRegisteredAt()) + return ringDesc, true, nil + })) + } + + // Assert on the lifecycler state once the instance register delegate function will be called. + delegate.onRegister = func(_ *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + assert.Equal(t, services.Starting, lifecycler.State()) + assert.False(t, lifecycler.IsRegistered()) + assert.Equal(t, testInstanceID, instanceID) + assert.NotNil(t, ringDesc) + + if testData.initialInstanceID == instanceID { + assert.True(t, instanceExists) + assert.Equal(t, testData.initialInstanceDesc.Addr, instanceDesc.Addr) + assert.Equal(t, testData.initialInstanceDesc.Zone, instanceDesc.Zone) + assert.Equal(t, testData.initialInstanceDesc.State, instanceDesc.State) + assert.Equal(t, testData.initialInstanceDesc.Tokens, instanceDesc.Tokens) + assert.Equal(t, testData.initialInstanceDesc.RegisteredTimestamp, instanceDesc.RegisteredTimestamp) + } else { + assert.False(t, instanceExists) + } + + return testData.registerState, testData.registerTokens + } + + assert.Equal(t, testInstanceID, lifecycler.GetInstanceID()) + assert.Equal(t, services.New, lifecycler.State()) + assert.Equal(t, PENDING, lifecycler.GetState()) + assert.Empty(t, lifecycler.GetTokens()) + assert.False(t, lifecycler.IsRegistered()) + assert.Equal(t, float64(0), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + assert.Zero(t, lifecycler.GetRegisteredAt()) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + assert.Equal(t, services.Running, lifecycler.State()) + assert.Equal(t, testData.registerState, lifecycler.GetState()) + assert.Equal(t, testData.registerTokens, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + + // Assert on the instance registered within the ring. + instanceDesc, ok := getInstanceFromStore(t, store, testInstanceID) + assert.True(t, ok) + assert.Equal(t, cfg.Addr, instanceDesc.GetAddr()) + assert.Equal(t, testData.registerState, instanceDesc.GetState()) + assert.Equal(t, testData.registerTokens, Tokens(instanceDesc.GetTokens())) + assert.Equal(t, cfg.Zone, instanceDesc.GetZone()) + + // The expected registered timestamp is "now" if the instance didn't exist in the ring yet + // or the already existing value. + if testData.initialInstanceID == testInstanceID { + assert.Equal(t, testData.initialInstanceDesc.RegisteredTimestamp, instanceDesc.RegisteredTimestamp) + } else { + assert.InDelta(t, time.Now().Unix(), instanceDesc.RegisteredTimestamp, 2) + } + }) + } +} + +func TestBasicLifecycler_UnregisterOnStop(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (InstanceState, Tokens) { + return ACTIVE, Tokens{1, 2, 3, 4, 5} + } + delegate.onStopping = func(_ *BasicLifecycler) { + assert.Equal(t, services.Stopping, lifecycler.State()) + } + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, ACTIVE, lifecycler.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.NotZero(t, lifecycler.GetRegisteredAt()) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + + require.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + assert.Equal(t, PENDING, lifecycler.GetState()) + assert.Equal(t, Tokens{}, lifecycler.GetTokens()) + assert.False(t, lifecycler.IsRegistered()) + assert.Zero(t, lifecycler.GetRegisteredAt()) + assert.Equal(t, float64(0), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(0), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + + // Assert on the instance removed from the ring. + _, ok := getInstanceFromStore(t, store, testInstanceID) + assert.False(t, ok) +} + +func TestBasicLifecycler_KeepInTheRingOnStop(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.KeepInstanceInTheRingOnShutdown = true + + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (InstanceState, Tokens) { + return ACTIVE, Tokens{1, 2, 3, 4, 5} + } + delegate.onStopping = func(lifecycler *BasicLifecycler) { + require.NoError(t, lifecycler.changeState(context.Background(), LEAVING)) + } + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, ACTIVE, lifecycler.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.NotZero(t, lifecycler.GetRegisteredAt()) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + + require.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + assert.Equal(t, LEAVING, lifecycler.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, lifecycler.GetTokens()) + assert.True(t, lifecycler.IsRegistered()) + assert.NotZero(t, lifecycler.GetRegisteredAt()) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensOwned)) + assert.Equal(t, float64(cfg.NumTokens), testutil.ToFloat64(lifecycler.metrics.tokensToOwn)) + + // Assert on the instance is in the ring. + inst, ok := getInstanceFromStore(t, store, testInstanceID) + assert.True(t, ok) + assert.Equal(t, cfg.Addr, inst.GetAddr()) + assert.Equal(t, LEAVING, inst.GetState()) + assert.Equal(t, Tokens{1, 2, 3, 4, 5}, Tokens(inst.GetTokens())) + assert.Equal(t, cfg.Zone, inst.GetZone()) +} + +func TestBasicLifecycler_HeartbeatWhileRunning(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.HeartbeatPeriod = 10 * time.Millisecond + + lifecycler, _, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + // Get the initial timestamp so that we can then assert on the timestamp updated. + desc, _ := getInstanceFromStore(t, store, testInstanceID) + initialTimestamp := desc.GetTimestamp() + + test.Poll(t, time.Second, true, func() interface{} { + desc, _ := getInstanceFromStore(t, store, testInstanceID) + currTimestamp := desc.GetTimestamp() + + return currTimestamp > initialTimestamp + }) + + assert.Greater(t, testutil.ToFloat64(lifecycler.metrics.heartbeats), float64(0)) +} + +func TestBasicLifecycler_HeartbeatWhileStopping(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.HeartbeatPeriod = 10 * time.Millisecond + + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + onStoppingCalled := false + + delegate.onStopping = func(_ *BasicLifecycler) { + // Since the hearbeat timestamp is in seconds we would have to wait 1s before we can assert + // on it being changed, regardless the heartbeat period. To speed up this test, we're going + // to reset the timestamp to 0 and then assert it has been updated. + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + ringDesc := GetOrCreateRingDesc(in) + instanceDesc := ringDesc.Ingesters[testInstanceID] + instanceDesc.Timestamp = 0 + ringDesc.Ingesters[testInstanceID] = instanceDesc + return ringDesc, true, nil + })) + + // Wait until the timestamp has been updated. + test.Poll(t, time.Second, true, func() interface{} { + desc, _ := getInstanceFromStore(t, store, testInstanceID) + currTimestamp := desc.GetTimestamp() + + return currTimestamp != 0 + }) + + onStoppingCalled = true + } + + assert.NoError(t, services.StopAndAwaitTerminated(ctx, lifecycler)) + assert.True(t, onStoppingCalled) +} + +func TestBasicLifecycler_HeartbeatAfterBackendRest(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.HeartbeatPeriod = 10 * time.Millisecond + + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + registerTokens := Tokens{1, 2, 3, 4, 5} + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (state InstanceState, tokens Tokens) { + return ACTIVE, registerTokens + } + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + // At this point the instance has been registered to the ring. + expectedRegisteredAt := lifecycler.GetRegisteredAt() + + // Now we delete it from the ring to simulate a ring storage reset and we expect the next heartbeat + // will restore it. + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + return NewDesc(), true, nil + })) + + test.Poll(t, time.Second, true, func() interface{} { + desc, ok := getInstanceFromStore(t, store, testInstanceID) + return ok && + desc.GetTimestamp() > 0 && + desc.GetState() == ACTIVE && + Tokens(desc.GetTokens()).Equals(registerTokens) && + desc.GetAddr() == cfg.Addr && + desc.GetRegisteredAt().Unix() == expectedRegisteredAt.Unix() + }) +} + +func TestBasicLifecycler_ChangeState(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (InstanceState, Tokens) { + return JOINING, Tokens{1, 2, 3, 4, 5} + } + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + assert.Equal(t, JOINING, lifecycler.GetState()) + + for _, state := range []InstanceState{ACTIVE, LEAVING} { + assert.NoError(t, lifecycler.ChangeState(ctx, state)) + assert.Equal(t, state, lifecycler.GetState()) + + // Assert on the instance state read from the ring. + desc, ok := getInstanceFromStore(t, store, testInstanceID) + assert.True(t, ok) + assert.Equal(t, state, desc.GetState()) + } +} + +func TestBasicLifecycler_TokensObservePeriod(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.NumTokens = 5 + cfg.TokensObservePeriod = time.Second + + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (InstanceState, Tokens) { + return ACTIVE, Tokens{1, 2, 3, 4, 5} + } + + require.NoError(t, lifecycler.StartAsync(ctx)) + + // While the lifecycler is starting we poll the ring. As soon as the instance + // is registered, we remove some tokens to simulate how gossip memberlist + // reconciliation works in case of clashing tokens. + test.Poll(t, time.Second, true, func() interface{} { + // Ensure the instance has been registered in the ring. + desc, ok := getInstanceFromStore(t, store, testInstanceID) + if !ok { + return false + } + + // Remove some tokens. + return store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + ringDesc := GetOrCreateRingDesc(in) + ringDesc.AddIngester(testInstanceID, desc.Addr, desc.Zone, Tokens{4, 5}, desc.State, time.Now()) + return ringDesc, true, nil + }) == nil + }) + + require.NoError(t, lifecycler.AwaitRunning(ctx)) + assert.Subset(t, lifecycler.GetTokens(), Tokens{4, 5}) + assert.NotContains(t, lifecycler.GetTokens(), uint32(1)) + assert.NotContains(t, lifecycler.GetTokens(), uint32(2)) + assert.NotContains(t, lifecycler.GetTokens(), uint32(3)) +} + +func TestBasicLifecycler_updateInstance_ShouldAddInstanceToTheRingIfDoesNotExistEvenIfNotChanged(t *testing.T) { + ctx := context.Background() + cfg := prepareBasicLifecyclerConfig() + cfg.HeartbeatPeriod = time.Hour // No heartbeat during the test. + + lifecycler, delegate, store, err := prepareBasicLifecycler(t, cfg) + require.NoError(t, err) + defer services.StopAndAwaitTerminated(ctx, lifecycler) //nolint:errcheck + + registerTokens := Tokens{1, 2, 3, 4, 5} + delegate.onRegister = func(_ *BasicLifecycler, _ Desc, _ bool, _ string, _ InstanceDesc) (state InstanceState, tokens Tokens) { + return ACTIVE, registerTokens + } + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + + // At this point the instance has been registered to the ring. + expectedRegisteredAt := lifecycler.GetRegisteredAt() + + // Now we delete it from the ring to simulate a ring storage reset. + require.NoError(t, store.CAS(ctx, testRingKey, func(in interface{}) (out interface{}, retry bool, err error) { + return NewDesc(), true, nil + })) + + // Run a noop update instance, but since the instance is not in the ring we do expect + // it will added back anyway. + require.NoError(t, lifecycler.updateInstance(ctx, func(_ *Desc, desc *InstanceDesc) bool { + return false + })) + + desc, ok := getInstanceFromStore(t, store, testInstanceID) + require.True(t, ok) + assert.Equal(t, ACTIVE, desc.GetState()) + assert.Equal(t, registerTokens, Tokens(desc.GetTokens())) + assert.Equal(t, cfg.Addr, desc.GetAddr()) + assert.Equal(t, expectedRegisteredAt.Unix(), desc.RegisteredTimestamp) + assert.Equal(t, expectedRegisteredAt.Unix(), desc.GetRegisteredAt().Unix()) +} + +func prepareBasicLifecyclerConfig() BasicLifecyclerConfig { + return BasicLifecyclerConfig{ + ID: testInstanceID, + Addr: "127.0.0.1:12345", + Zone: "test-zone", + HeartbeatPeriod: time.Minute, + TokensObservePeriod: 0, + NumTokens: 5, + } +} + +func prepareBasicLifecycler(t testing.TB, cfg BasicLifecyclerConfig) (*BasicLifecycler, *mockDelegate, kv.Client, error) { + delegate := &mockDelegate{} + lifecycler, store, err := prepareBasicLifecyclerWithDelegate(t, cfg, delegate) + return lifecycler, delegate, store, err +} + +func prepareBasicLifecyclerWithDelegate(t testing.TB, cfg BasicLifecyclerConfig, delegate BasicLifecyclerDelegate) (*BasicLifecycler, kv.Client, error) { + t.Helper() + + store, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + lifecycler, err := NewBasicLifecycler(cfg, testRingName, testRingKey, store, delegate, log.NewNopLogger(), nil) + return lifecycler, store, err +} + +type mockDelegate struct { + onRegister func(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) + onTokensChanged func(lifecycler *BasicLifecycler, tokens Tokens) + onStopping func(lifecycler *BasicLifecycler) + onHeartbeat func(lifecycler *BasicLifecycler, ringDesc *Desc, instanceDesc *InstanceDesc) +} + +func (m *mockDelegate) OnRingInstanceRegister(lifecycler *BasicLifecycler, ringDesc Desc, instanceExists bool, instanceID string, instanceDesc InstanceDesc) (InstanceState, Tokens) { + if m.onRegister == nil { + return PENDING, Tokens{} + } + + return m.onRegister(lifecycler, ringDesc, instanceExists, instanceID, instanceDesc) +} + +func (m *mockDelegate) OnRingInstanceTokens(lifecycler *BasicLifecycler, tokens Tokens) { + if m.onTokensChanged != nil { + m.onTokensChanged(lifecycler, tokens) + } +} + +func (m *mockDelegate) OnRingInstanceStopping(lifecycler *BasicLifecycler) { + if m.onStopping != nil { + m.onStopping(lifecycler) + } +} + +func (m *mockDelegate) OnRingInstanceHeartbeat(lifecycler *BasicLifecycler, ringDesc *Desc, instanceDesc *InstanceDesc) { + if m.onHeartbeat != nil { + m.onHeartbeat(lifecycler, ringDesc, instanceDesc) + } +} + +func getInstanceFromStore(t *testing.T, store kv.Client, instanceID string) (InstanceDesc, bool) { + out, err := store.Get(context.Background(), testRingKey) + require.NoError(t, err) + + if out == nil { + return InstanceDesc{}, false + } + + ringDesc := out.(*Desc) + instanceDesc, ok := ringDesc.GetIngesters()[instanceID] + + return instanceDesc, ok +} diff --git a/vendor/github.com/grafana/dskit/ring/batch.go b/pkg/ring/batch.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/batch.go rename to pkg/ring/batch.go diff --git a/pkg/ring/bench/ring_memberlist_test.go b/pkg/ring/bench/ring_memberlist_test.go new file mode 100644 index 00000000000..0eb1f5ecbe3 --- /dev/null +++ b/pkg/ring/bench/ring_memberlist_test.go @@ -0,0 +1,109 @@ +package bench + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv/codec" + "github.com/grafana/dskit/kv/memberlist" + "github.com/grafana/dskit/services" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/require" + + "github.com/cortexproject/cortex/pkg/ring" +) + +type dnsProviderMock struct { + resolved []string +} + +func (p *dnsProviderMock) Resolve(ctx context.Context, addrs []string) error { + p.resolved = addrs + return nil +} + +func (p dnsProviderMock) Addresses() []string { + return p.resolved +} + +func encodeMessage(b *testing.B, key string, d *ring.Desc) []byte { + c := ring.GetCodec() + val, err := c.Encode(d) + require.NoError(b, err) + + kvPair := memberlist.KeyValuePair{ + Key: key, + Value: val, + Codec: c.CodecID(), + } + + ser, err := kvPair.Marshal() + require.NoError(b, err) + return ser +} + +func generateUniqueTokens(ingester, numTokens int) []uint32 { + // Generate unique tokens without using ring.GenerateTokens in order to not + // rely on random number generation. Also, because generating unique tokens + // with GenerateTokens can be quite expensive, it pollutes the CPU profile + // to the point of being useless. + tokens := make([]uint32, numTokens) + for i := range tokens { + tokens[i] = uint32((ingester * 100000) + (i * 10)) + } + return tokens +} + +// Benchmark the memberlist receive path when it is being used as the ring backing store. +func BenchmarkMemberlistReceiveWithRingDesc(b *testing.B) { + c := ring.GetCodec() + + var cfg memberlist.KVConfig + flagext.DefaultValues(&cfg) + cfg.TCPTransport = memberlist.TCPTransportConfig{ + BindAddrs: []string{"localhost"}, + } + cfg.Codecs = []codec.Codec{c} + + mkv := memberlist.NewKV(cfg, log.NewNopLogger(), &dnsProviderMock{}, prometheus.NewPedanticRegistry()) + require.NoError(b, services.StartAndAwaitRunning(context.Background(), mkv)) + defer services.StopAndAwaitTerminated(context.Background(), mkv) //nolint:errcheck + + // Build the initial ring state: + // - The ring isn't actually in use, so the fields such as address/zone are not important. + // - The number of keys in the store has no impact for this test, so simulate a single ring. + // - The number of instances in the ring does have a big impact. + const numInstances = 600 + const numTokens = 128 + initialDesc := ring.NewDesc() + { + for i := 0; i < numInstances; i++ { + tokens := generateUniqueTokens(i, numTokens) + initialDesc.AddIngester(fmt.Sprintf("instance-%d", i), "127.0.0.1", "zone", tokens, ring.ACTIVE, time.Now()) + } + // Send a single update to populate the store. + msg := encodeMessage(b, "ring", initialDesc) + mkv.NotifyMsg(msg) + } + + // Ensure that each received message updates the ring. + testMsgs := make([][]byte, b.N) + for i := range testMsgs { + instance := initialDesc.Ingesters["instance-0"] + instance.Timestamp = initialDesc.Ingesters["instance-0"].RegisteredTimestamp + int64(i) + + testDesc := ring.NewDesc() + testDesc.Ingesters["instance-0"] = instance + testMsgs[i] = encodeMessage(b, "ring", testDesc) + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + mkv.NotifyMsg(testMsgs[i]) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/client/pool.go b/pkg/ring/client/pool.go similarity index 99% rename from vendor/github.com/grafana/dskit/ring/client/pool.go rename to pkg/ring/client/pool.go index 57b462cc414..cdf0a4bf0a5 100644 --- a/vendor/github.com/grafana/dskit/ring/client/pool.go +++ b/pkg/ring/client/pool.go @@ -9,12 +9,12 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" "github.com/weaveworks/common/user" "google.golang.org/grpc/health/grpc_health_v1" - "github.com/grafana/dskit/ring/util" - "github.com/grafana/dskit/services" + "github.com/cortexproject/cortex/pkg/ring/util" ) // PoolClient is the interface that should be implemented by a diff --git a/pkg/ring/client/pool_test.go b/pkg/ring/client/pool_test.go new file mode 100644 index 00000000000..068d9c896b3 --- /dev/null +++ b/pkg/ring/client/pool_test.go @@ -0,0 +1,146 @@ +package client + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/gogo/status" + "github.com/grafana/dskit/services" + "github.com/stretchr/testify/require" + "google.golang.org/grpc" + "google.golang.org/grpc/codes" + "google.golang.org/grpc/health/grpc_health_v1" +) + +type mockClient struct { + happy bool + status grpc_health_v1.HealthCheckResponse_ServingStatus +} + +func (i mockClient) Check(ctx context.Context, in *grpc_health_v1.HealthCheckRequest, opts ...grpc.CallOption) (*grpc_health_v1.HealthCheckResponse, error) { + if !i.happy { + return nil, fmt.Errorf("Fail") + } + return &grpc_health_v1.HealthCheckResponse{Status: i.status}, nil +} + +func (i mockClient) Close() error { + return nil +} + +func (i mockClient) Watch(ctx context.Context, in *grpc_health_v1.HealthCheckRequest, opts ...grpc.CallOption) (grpc_health_v1.Health_WatchClient, error) { + return nil, status.Error(codes.Unimplemented, "Watching is not supported") +} + +func TestHealthCheck(t *testing.T) { + tcs := []struct { + client mockClient + hasError bool + }{ + {mockClient{happy: true, status: grpc_health_v1.HealthCheckResponse_UNKNOWN}, true}, + {mockClient{happy: true, status: grpc_health_v1.HealthCheckResponse_SERVING}, false}, + {mockClient{happy: true, status: grpc_health_v1.HealthCheckResponse_NOT_SERVING}, true}, + {mockClient{happy: false, status: grpc_health_v1.HealthCheckResponse_UNKNOWN}, true}, + {mockClient{happy: false, status: grpc_health_v1.HealthCheckResponse_SERVING}, true}, + {mockClient{happy: false, status: grpc_health_v1.HealthCheckResponse_NOT_SERVING}, true}, + } + for _, tc := range tcs { + err := healthCheck(tc.client, 50*time.Millisecond) + hasError := err != nil + if hasError != tc.hasError { + t.Errorf("Expected error: %t, error: %v", tc.hasError, err) + } + } +} + +func TestPoolCache(t *testing.T) { + buildCount := 0 + factory := func(addr string) (PoolClient, error) { + if addr == "bad" { + return nil, fmt.Errorf("Fail") + } + buildCount++ + return mockClient{happy: true, status: grpc_health_v1.HealthCheckResponse_SERVING}, nil + } + + cfg := PoolConfig{ + HealthCheckTimeout: 50 * time.Millisecond, + CheckInterval: 10 * time.Second, + } + + pool := NewPool("test", cfg, nil, factory, nil, log.NewNopLogger()) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), pool)) + defer services.StopAndAwaitTerminated(context.Background(), pool) //nolint:errcheck + + _, err := pool.GetClientFor("1") + require.NoError(t, err) + if buildCount != 1 { + t.Errorf("Did not create client") + } + + _, err = pool.GetClientFor("1") + require.NoError(t, err) + if buildCount != 1 { + t.Errorf("Created client that should have been cached") + } + + _, err = pool.GetClientFor("2") + require.NoError(t, err) + if pool.Count() != 2 { + t.Errorf("Expected Count() = 2, got %d", pool.Count()) + } + + pool.RemoveClientFor("1") + if pool.Count() != 1 { + t.Errorf("Expected Count() = 1, got %d", pool.Count()) + } + + _, err = pool.GetClientFor("1") + require.NoError(t, err) + if buildCount != 3 || pool.Count() != 2 { + t.Errorf("Did not re-create client correctly") + } + + _, err = pool.GetClientFor("bad") + if err == nil { + t.Errorf("Bad create should have thrown an error") + } + if pool.Count() != 2 { + t.Errorf("Bad create should not have been added to cache") + } + + addrs := pool.RegisteredAddresses() + if len(addrs) != pool.Count() { + t.Errorf("Lengths of registered addresses and cache.Count() do not match") + } +} + +func TestCleanUnhealthy(t *testing.T) { + goodAddrs := []string{"good1", "good2"} + badAddrs := []string{"bad1", "bad2"} + clients := map[string]PoolClient{} + for _, addr := range goodAddrs { + clients[addr] = mockClient{happy: true, status: grpc_health_v1.HealthCheckResponse_SERVING} + } + for _, addr := range badAddrs { + clients[addr] = mockClient{happy: false, status: grpc_health_v1.HealthCheckResponse_NOT_SERVING} + } + pool := &Pool{ + clients: clients, + logger: log.NewNopLogger(), + } + pool.cleanUnhealthy() + for _, addr := range badAddrs { + if _, ok := pool.clients[addr]; ok { + t.Errorf("Found bad client after clean: %s\n", addr) + } + } + for _, addr := range goodAddrs { + if _, ok := pool.clients[addr]; !ok { + t.Errorf("Could not find good client after clean: %s\n", addr) + } + } +} diff --git a/vendor/github.com/grafana/dskit/ring/client/ring_service_discovery.go b/pkg/ring/client/ring_service_discovery.go similarity index 91% rename from vendor/github.com/grafana/dskit/ring/client/ring_service_discovery.go rename to pkg/ring/client/ring_service_discovery.go index 2210e754917..797b171c074 100644 --- a/vendor/github.com/grafana/dskit/ring/client/ring_service_discovery.go +++ b/pkg/ring/client/ring_service_discovery.go @@ -3,7 +3,7 @@ package client import ( "errors" - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" ) func NewRingServiceDiscovery(r ring.ReadRing) PoolServiceDiscovery { diff --git a/pkg/ring/client/ring_service_discovery_test.go b/pkg/ring/client/ring_service_discovery_test.go new file mode 100644 index 00000000000..d161b724239 --- /dev/null +++ b/pkg/ring/client/ring_service_discovery_test.go @@ -0,0 +1,67 @@ +package client + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/cortexproject/cortex/pkg/ring" +) + +func TestNewRingServiceDiscovery(t *testing.T) { + tests := map[string]struct { + ringReplicationSet ring.ReplicationSet + ringErr error + expectedAddrs []string + expectedErr error + }{ + "discovery failure": { + ringErr: errors.New("mocked error"), + expectedErr: errors.New("mocked error"), + }, + "empty ring": { + ringErr: ring.ErrEmptyRing, + expectedAddrs: nil, + }, + "empty replication set": { + ringReplicationSet: ring.ReplicationSet{ + Instances: []ring.InstanceDesc{}, + }, + expectedAddrs: nil, + }, + "replication containing some endpoints": { + ringReplicationSet: ring.ReplicationSet{ + Instances: []ring.InstanceDesc{ + {Addr: "1.1.1.1"}, + {Addr: "2.2.2.2"}, + }, + }, + expectedAddrs: []string{"1.1.1.1", "2.2.2.2"}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + r := &mockReadRing{} + r.mockedReplicationSet = testData.ringReplicationSet + r.mockedErr = testData.ringErr + + d := NewRingServiceDiscovery(r) + addrs, err := d() + assert.Equal(t, testData.expectedErr, err) + assert.Equal(t, testData.expectedAddrs, addrs) + }) + } +} + +type mockReadRing struct { + ring.ReadRing + + mockedReplicationSet ring.ReplicationSet + mockedErr error +} + +func (m *mockReadRing) GetAllHealthy(_ ring.Operation) (ring.ReplicationSet, error) { + return m.mockedReplicationSet, m.mockedErr +} diff --git a/vendor/github.com/grafana/dskit/ring/flush.go b/pkg/ring/flush.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/flush.go rename to pkg/ring/flush.go diff --git a/vendor/github.com/grafana/dskit/ring/http.go b/pkg/ring/http.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/http.go rename to pkg/ring/http.go diff --git a/vendor/github.com/grafana/dskit/ring/lifecycler.go b/pkg/ring/lifecycler.go similarity index 99% rename from vendor/github.com/grafana/dskit/ring/lifecycler.go rename to pkg/ring/lifecycler.go index be103e1fbad..6619a237954 100644 --- a/vendor/github.com/grafana/dskit/ring/lifecycler.go +++ b/pkg/ring/lifecycler.go @@ -11,14 +11,13 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv" + "github.com/grafana/dskit/services" "github.com/pkg/errors" perrors "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "go.uber.org/atomic" - - "github.com/grafana/dskit/flagext" - "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/services" ) // LifecyclerConfig is the config to build a Lifecycler. diff --git a/vendor/github.com/grafana/dskit/ring/lifecycler_metrics.go b/pkg/ring/lifecycler_metrics.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/lifecycler_metrics.go rename to pkg/ring/lifecycler_metrics.go diff --git a/pkg/ring/lifecycler_test.go b/pkg/ring/lifecycler_test.go new file mode 100644 index 00000000000..66cc371911c --- /dev/null +++ b/pkg/ring/lifecycler_test.go @@ -0,0 +1,849 @@ +package ring + +import ( + "context" + "fmt" + "sort" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/services" + "github.com/grafana/dskit/test" + "github.com/pkg/errors" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + // ring key used for testware + ringKey = "ring" +) + +func testLifecyclerConfig(ringConfig Config, id string) LifecyclerConfig { + var lifecyclerConfig LifecyclerConfig + flagext.DefaultValues(&lifecyclerConfig) + lifecyclerConfig.Addr = "0.0.0.0" + lifecyclerConfig.Port = 1 + lifecyclerConfig.ListenPort = 0 + lifecyclerConfig.RingConfig = ringConfig + lifecyclerConfig.NumTokens = 1 + lifecyclerConfig.ID = id + lifecyclerConfig.Zone = "zone1" + lifecyclerConfig.FinalSleep = 0 + lifecyclerConfig.HeartbeatPeriod = 100 * time.Millisecond + + return lifecyclerConfig +} + +func checkNormalised(d interface{}, id string) bool { + desc, ok := d.(*Desc) + return ok && + len(desc.Ingesters) == 1 && + desc.Ingesters[id].State == ACTIVE && + len(desc.Ingesters[id].Tokens) == 1 +} + +func TestLifecycler_HealthyInstancesCount(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + ctx := context.Background() + + // Add the first ingester to the ring + lifecyclerConfig1 := testLifecyclerConfig(ringConfig, "ing1") + lifecyclerConfig1.HeartbeatPeriod = 100 * time.Millisecond + lifecyclerConfig1.JoinAfter = 100 * time.Millisecond + + lifecycler1, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + assert.Equal(t, 0, lifecycler1.HealthyInstancesCount()) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler1)) + defer services.StopAndAwaitTerminated(ctx, lifecycler1) // nolint:errcheck + + // Assert the first ingester joined the ring + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + return lifecycler1.HealthyInstancesCount() == 1 + }) + + // Add the second ingester to the ring + lifecyclerConfig2 := testLifecyclerConfig(ringConfig, "ing2") + lifecyclerConfig2.HeartbeatPeriod = 100 * time.Millisecond + lifecyclerConfig2.JoinAfter = 100 * time.Millisecond + + lifecycler2, err := NewLifecycler(lifecyclerConfig2, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + assert.Equal(t, 0, lifecycler2.HealthyInstancesCount()) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler2)) + defer services.StopAndAwaitTerminated(ctx, lifecycler2) // nolint:errcheck + + // Assert the second ingester joined the ring + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + return lifecycler2.HealthyInstancesCount() == 2 + }) + + // Assert the first ingester count is updated + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + return lifecycler1.HealthyInstancesCount() == 2 + }) +} + +func TestLifecycler_ZonesCount(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + events := []struct { + zone string + expectedZones int + }{ + {"zone-a", 1}, + {"zone-b", 2}, + {"zone-a", 2}, + {"zone-c", 3}, + } + + for idx, event := range events { + ctx := context.Background() + + // Register an ingester to the ring. + cfg := testLifecyclerConfig(ringConfig, fmt.Sprintf("instance-%d", idx)) + cfg.HeartbeatPeriod = 100 * time.Millisecond + cfg.JoinAfter = 100 * time.Millisecond + cfg.Zone = event.zone + + lifecycler, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + assert.Equal(t, 0, lifecycler.ZonesCount()) + + require.NoError(t, services.StartAndAwaitRunning(ctx, lifecycler)) + defer services.StopAndAwaitTerminated(ctx, lifecycler) // nolint:errcheck + + // Wait until joined. + test.Poll(t, time.Second, idx+1, func() interface{} { + return lifecycler.HealthyInstancesCount() + }) + + assert.Equal(t, event.expectedZones, lifecycler.ZonesCount()) + } +} + +func TestLifecycler_NilFlushTransferer(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + lifecyclerConfig := testLifecyclerConfig(ringConfig, "ing1") + + // Create a lifecycler with nil FlushTransferer to make sure it operates correctly + lifecycler, err := NewLifecycler(lifecyclerConfig, nil, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), lifecycler)) + + // Ensure the lifecycler joined the ring + test.Poll(t, time.Second, 1, func() interface{} { + return lifecycler.HealthyInstancesCount() + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), lifecycler)) + + assert.Equal(t, 0, lifecycler.HealthyInstancesCount()) +} + +func TestLifecycler_TwoRingsWithDifferentKeysOnTheSameKVStore(t *testing.T) { + // Create a shared ring + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + // Create two lifecyclers, each on a separate ring + lifecyclerConfig1 := testLifecyclerConfig(ringConfig, "instance-1") + lifecyclerConfig2 := testLifecyclerConfig(ringConfig, "instance-2") + + lifecycler1, err := NewLifecycler(lifecyclerConfig1, nil, "service-1", "ring-1", true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), lifecycler1)) + defer services.StopAndAwaitTerminated(context.Background(), lifecycler1) //nolint:errcheck + + lifecycler2, err := NewLifecycler(lifecyclerConfig2, nil, "service-2", "ring-2", true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), lifecycler2)) + defer services.StopAndAwaitTerminated(context.Background(), lifecycler2) //nolint:errcheck + + // Ensure each lifecycler reports 1 healthy instance, because they're + // in a different ring + test.Poll(t, time.Second, 1, func() interface{} { + return lifecycler1.HealthyInstancesCount() + }) + + test.Poll(t, time.Second, 1, func() interface{} { + return lifecycler2.HealthyInstancesCount() + }) +} + +type nopFlushTransferer struct{} + +func (f *nopFlushTransferer) Flush() {} +func (f *nopFlushTransferer) TransferOut(_ context.Context) error { + return nil +} + +func TestLifecycler_ShouldHandleInstanceAbruptlyRestarted(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + // Add an 'ingester' with normalised tokens. + lifecyclerConfig1 := testLifecyclerConfig(ringConfig, "ing1") + l1, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check this ingester joined, is active, and has one token. + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + return checkNormalised(d, "ing1") + }) + + expectedTokens := l1.getTokens() + expectedRegisteredAt := l1.getRegisteredAt() + + // Wait 1 second because the registered timestamp has second precision. Without waiting + // we wouldn't have the guarantee the previous registered timestamp is preserved. + time.Sleep(time.Second) + + // Add a second ingester with the same settings, so it will think it has restarted + l2, err := NewLifecycler(lifecyclerConfig1, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l2)) + + // Check the new ingester picked up the same tokens and registered timestamp. + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + + return checkNormalised(d, "ing1") && + expectedTokens.Equals(l2.getTokens()) && + expectedRegisteredAt.Unix() == l2.getRegisteredAt().Unix() + }) +} + +type MockClient struct { + ListFunc func(ctx context.Context, prefix string) ([]string, error) + GetFunc func(ctx context.Context, key string) (interface{}, error) + DeleteFunc func(ctx context.Context, key string) error + CASFunc func(ctx context.Context, key string, f func(in interface{}) (out interface{}, retry bool, err error)) error + WatchKeyFunc func(ctx context.Context, key string, f func(interface{}) bool) + WatchPrefixFunc func(ctx context.Context, prefix string, f func(string, interface{}) bool) +} + +func (m *MockClient) List(ctx context.Context, prefix string) ([]string, error) { + if m.ListFunc != nil { + return m.ListFunc(ctx, prefix) + } + + return nil, nil +} + +func (m *MockClient) Get(ctx context.Context, key string) (interface{}, error) { + if m.GetFunc != nil { + return m.GetFunc(ctx, key) + } + + return nil, nil +} + +func (m *MockClient) Delete(ctx context.Context, key string) error { + if m.DeleteFunc != nil { + return m.DeleteFunc(ctx, key) + } + + return nil +} + +func (m *MockClient) CAS(ctx context.Context, key string, f func(in interface{}) (out interface{}, retry bool, err error)) error { + if m.CASFunc != nil { + return m.CASFunc(ctx, key, f) + } + + return nil +} + +func (m *MockClient) WatchKey(ctx context.Context, key string, f func(interface{}) bool) { + if m.WatchKeyFunc != nil { + m.WatchKeyFunc(ctx, key, f) + } +} + +func (m *MockClient) WatchPrefix(ctx context.Context, prefix string, f func(string, interface{}) bool) { + if m.WatchPrefixFunc != nil { + m.WatchPrefixFunc(ctx, prefix, f) + } +} + +// Ensure a check ready returns error when consul returns a nil key and the ingester already holds keys. This happens if the ring key gets deleted +func TestCheckReady_NoRingInKVStore(t *testing.T) { + ctx := context.Background() + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = &MockClient{} + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, r.StartAsync(ctx)) + // This is very atypical, but if we used AwaitRunning, that would fail, because of how quickly service terminates ... + // by the time we check for Running state, it is already terminated, because mock ring has no WatchFunc, so it + // will just exit. + require.NoError(t, r.AwaitTerminated(ctx)) + + cfg := testLifecyclerConfig(ringConfig, "ring1") + cfg.MinReadyDuration = 1 * time.Nanosecond + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, l1)) + t.Cleanup(func() { + require.NoError(t, services.StopAndAwaitTerminated(ctx, l1)) + }) + + l1.setTokens([]uint32{1}) + + err = l1.CheckReady(context.Background()) + require.Error(t, err) + assert.Contains(t, err.Error(), "no ring returned from the KV store") +} + +func TestCheckReady_MinReadyDuration(t *testing.T) { + tests := map[string]struct { + minReadyDuration time.Duration + expectedMinDelay time.Duration + }{ + "should immediately pass the check if the instance is ACTIVE and healthy and min ready duration is disabled": { + minReadyDuration: 0, + expectedMinDelay: 0, + }, + "should wait min ready duration before passing the check after the instance is ACTIVE and healthy": { + minReadyDuration: time.Second, + expectedMinDelay: time.Second, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + cfg := testLifecyclerConfig(ringConfig, "instance-1") + cfg.ReadinessCheckRingHealth = false + cfg.MinReadyDuration = testData.minReadyDuration + + l, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ring", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, l)) + t.Cleanup(func() { + require.NoError(t, services.StopAndAwaitTerminated(ctx, l)) + }) + + startTime := time.Now() + + // Wait until the instance is ACTIVE and healthy in the ring. + waitRingInstance(t, 3*time.Second, l, func(instance InstanceDesc) error { + return instance.IsReady(time.Now(), cfg.RingConfig.HeartbeatTimeout) + }) + + if testData.expectedMinDelay == 0 { + // We expect it to be immediately ready. + assert.NoError(t, l.CheckReady(ctx)) + } else { + // Poll the readiness check until ready and measure how much time it takes. + test.Poll(t, 3*time.Second, nil, func() interface{} { + return l.CheckReady(ctx) + }) + + assert.GreaterOrEqual(t, time.Since(startTime), testData.expectedMinDelay) + } + }) + } +} + +func TestCheckReady_CheckRingHealth(t *testing.T) { + tests := map[string]struct { + checkRingHealthEnabled bool + firstJoinAfter time.Duration + secondJoinAfter time.Duration + expectedFirstMinReady time.Duration + expectedFirstMaxReady time.Duration + }{ + "should wait until the self instance is ACTIVE and healthy in the ring when 'check ring health' is disabled": { + checkRingHealthEnabled: false, + firstJoinAfter: time.Second, + secondJoinAfter: 3 * time.Second, + expectedFirstMinReady: time.Second, + expectedFirstMaxReady: 2 * time.Second, + }, + "should wait until all instances are ACTIVE and healthy in the ring when 'check ring health' is enabled": { + checkRingHealthEnabled: true, + firstJoinAfter: time.Second, + secondJoinAfter: 3 * time.Second, + expectedFirstMinReady: 3 * time.Second, + expectedFirstMaxReady: 4 * time.Second, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + ctx := context.Background() + + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + // Create lifecycler #1. + cfg := testLifecyclerConfig(ringConfig, "instance-1") + cfg.ReadinessCheckRingHealth = testData.checkRingHealthEnabled + cfg.MinReadyDuration = 0 + cfg.JoinAfter = testData.firstJoinAfter + + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ring", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, l1)) + t.Cleanup(func() { + require.NoError(t, services.StopAndAwaitTerminated(ctx, l1)) + }) + + // Create lifecycler #2. + cfg = testLifecyclerConfig(ringConfig, "instance-2") + cfg.ReadinessCheckRingHealth = testData.checkRingHealthEnabled + cfg.MinReadyDuration = 0 + cfg.JoinAfter = testData.secondJoinAfter + + l2, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ring", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(ctx, l2)) + t.Cleanup(func() { + require.NoError(t, services.StopAndAwaitTerminated(ctx, l2)) + }) + + startTime := time.Now() + + // Wait until both instances are registered in the ring. We expect them to be registered + // immediately and then switch to ACTIVE after the configured auto join delay. + waitRingInstance(t, 3*time.Second, l1, func(instance InstanceDesc) error { return nil }) + waitRingInstance(t, 3*time.Second, l2, func(instance InstanceDesc) error { return nil }) + + // Poll the readiness check until ready and measure how much time it takes. + test.Poll(t, 5*time.Second, nil, func() interface{} { + return l1.CheckReady(ctx) + }) + + assert.GreaterOrEqual(t, time.Since(startTime), testData.expectedFirstMinReady) + assert.LessOrEqual(t, time.Since(startTime), testData.expectedFirstMaxReady) + }) + } +} + +type noopFlushTransferer struct { +} + +func (f *noopFlushTransferer) Flush() {} +func (f *noopFlushTransferer) TransferOut(ctx context.Context) error { return nil } + +func TestRestartIngester_DisabledHeartbeat_unregister_on_shutdown_false(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + + // poll function waits for a condition and returning actual state of the ingesters after the condition succeed. + poll := func(condition func(*Desc) bool) map[string]InstanceDesc { + var ingesters map[string]InstanceDesc + test.Poll(t, 5*time.Second, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + + desc, ok := d.(*Desc) + + if ok { + ingesters = desc.Ingesters + } + return ok && condition(desc) + }) + + return ingesters + } + + // Starts Ingester and wait it to became active + startIngesterAndWaitActive := func(ingId string) *Lifecycler { + lifecyclerConfig := testLifecyclerConfig(ringConfig, ingId) + // Disabling heartBeat and unregister_on_shutdown + lifecyclerConfig.UnregisterOnShutdown = false + lifecyclerConfig.HeartbeatPeriod = 0 + lifecycler, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "lifecycler", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), lifecycler)) + poll(func(desc *Desc) bool { + return desc.Ingesters[ingId].State == ACTIVE + }) + return lifecycler + } + + // We are going to create 2 fake ingester with disabled heart beat and `unregister_on_shutdown=false` then + // test if the ingester 2 became active after: + // * Clean Shutdown (LEAVING after shutdown) + // * Crashes while in the PENDING or JOINING state + l1 := startIngesterAndWaitActive("ing1") + defer services.StopAndAwaitTerminated(context.Background(), l1) //nolint:errcheck + + l2 := startIngesterAndWaitActive("ing2") + + ingesters := poll(func(desc *Desc) bool { + return len(desc.Ingesters) == 2 && desc.Ingesters["ing1"].State == ACTIVE && desc.Ingesters["ing2"].State == ACTIVE + }) + + // Both Ingester should be active and running + assert.Equal(t, ACTIVE, ingesters["ing1"].State) + assert.Equal(t, ACTIVE, ingesters["ing2"].State) + + // Stop One ingester gracefully should leave it on LEAVING STATE on the ring + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2)) + + ingesters = poll(func(desc *Desc) bool { + return len(desc.Ingesters) == 2 && desc.Ingesters["ing2"].State == LEAVING + }) + assert.Equal(t, LEAVING, ingesters["ing2"].State) + + // Start Ingester2 again - Should flip back to ACTIVE in the ring + l2 = startIngesterAndWaitActive("ing2") + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2)) + + // Simulate ingester2 crash on startup and left the ring with JOINING state + err = r.KVClient.CAS(context.Background(), ringKey, func(in interface{}) (out interface{}, retry bool, err error) { + desc, ok := in.(*Desc) + require.Equal(t, true, ok) + ingester2Desc := desc.Ingesters["ing2"] + ingester2Desc.State = JOINING + desc.Ingesters["ing2"] = ingester2Desc + return desc, true, nil + }) + require.NoError(t, err) + + l2 = startIngesterAndWaitActive("ing2") + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2)) + + // Simulate ingester2 crash on startup and left the ring with PENDING state + err = r.KVClient.CAS(context.Background(), ringKey, func(in interface{}) (out interface{}, retry bool, err error) { + desc, ok := in.(*Desc) + require.Equal(t, true, ok) + ingester2Desc := desc.Ingesters["ing2"] + ingester2Desc.State = PENDING + desc.Ingesters["ing2"] = ingester2Desc + return desc, true, nil + }) + require.NoError(t, err) + + l2 = startIngesterAndWaitActive("ing2") + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l2)) +} + +func TestTokensOnDisk(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + tokenDir := t.TempDir() + + lifecyclerConfig := testLifecyclerConfig(ringConfig, "ing1") + lifecyclerConfig.NumTokens = 512 + lifecyclerConfig.TokensFilePath = tokenDir + "/tokens" + + // Start first ingester. + l1, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check this ingester joined, is active, and has 512 token. + var expTokens []uint32 + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + + desc, ok := d.(*Desc) + if ok { + expTokens = desc.Ingesters["ing1"].Tokens + } + return ok && + len(desc.Ingesters) == 1 && + desc.Ingesters["ing1"].State == ACTIVE && + len(desc.Ingesters["ing1"].Tokens) == 512 + }) + + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), l1)) + + // Start new ingester at same token directory. + lifecyclerConfig.ID = "ing2" + l2, err := NewLifecycler(lifecyclerConfig, &noopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l2)) + defer services.StopAndAwaitTerminated(context.Background(), l2) //nolint:errcheck + + // Check this ingester joined, is active, and has 512 token. + var actTokens []uint32 + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + desc, ok := d.(*Desc) + if ok { + actTokens = desc.Ingesters["ing2"].Tokens + } + return ok && + len(desc.Ingesters) == 1 && + desc.Ingesters["ing2"].State == ACTIVE && + len(desc.Ingesters["ing2"].Tokens) == 512 + }) + + // Check for same tokens. + sort.Slice(expTokens, func(i, j int) bool { return expTokens[i] < expTokens[j] }) + sort.Slice(actTokens, func(i, j int) bool { return actTokens[i] < actTokens[j] }) + for i := 0; i < 512; i++ { + require.Equal(t, expTokens, actTokens) + } +} + +// JoinInLeavingState ensures that if the lifecycler starts up and the ring already has it in a LEAVING state that it still is able to auto join +func TestJoinInLeavingState(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + cfg := testLifecyclerConfig(ringConfig, "ing1") + cfg.NumTokens = 2 + cfg.MinReadyDuration = 1 * time.Nanosecond + + // Set state as LEAVING + err = r.KVClient.CAS(context.Background(), ringKey, func(in interface{}) (interface{}, bool, error) { + r := &Desc{ + Ingesters: map[string]InstanceDesc{ + "ing1": { + State: LEAVING, + Tokens: []uint32{1, 4}, + }, + "ing2": { + Tokens: []uint32{2, 3}, + }, + }, + } + + return r, true, nil + }) + require.NoError(t, err) + + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check that the lifecycler was able to join after coming up in LEAVING + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + + desc, ok := d.(*Desc) + return ok && + len(desc.Ingesters) == 2 && + desc.Ingesters["ing1"].State == ACTIVE && + len(desc.Ingesters["ing1"].Tokens) == cfg.NumTokens && + len(desc.Ingesters["ing2"].Tokens) == 2 + }) +} + +// JoinInJoiningState ensures that if the lifecycler starts up and the ring already has it in a JOINING state that it still is able to auto join +func TestJoinInJoiningState(t *testing.T) { + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + cfg := testLifecyclerConfig(ringConfig, "ing1") + cfg.NumTokens = 2 + cfg.MinReadyDuration = 1 * time.Nanosecond + instance1RegisteredAt := time.Now().Add(-1 * time.Hour) + instance2RegisteredAt := time.Now().Add(-2 * time.Hour) + + // Set state as JOINING + err = r.KVClient.CAS(context.Background(), ringKey, func(in interface{}) (interface{}, bool, error) { + r := &Desc{ + Ingesters: map[string]InstanceDesc{ + "ing1": { + State: JOINING, + Tokens: []uint32{1, 4}, + RegisteredTimestamp: instance1RegisteredAt.Unix(), + }, + "ing2": { + Tokens: []uint32{2, 3}, + RegisteredTimestamp: instance2RegisteredAt.Unix(), + }, + }, + } + + return r, true, nil + }) + require.NoError(t, err) + + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check that the lifecycler was able to join after coming up in JOINING + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + + desc, ok := d.(*Desc) + return ok && + len(desc.Ingesters) == 2 && + desc.Ingesters["ing1"].State == ACTIVE && + len(desc.Ingesters["ing1"].Tokens) == cfg.NumTokens && + len(desc.Ingesters["ing2"].Tokens) == 2 && + desc.Ingesters["ing1"].RegisteredTimestamp == instance1RegisteredAt.Unix() && + desc.Ingesters["ing2"].RegisteredTimestamp == instance2RegisteredAt.Unix() + }) +} + +func TestRestoreOfZoneWhenOverwritten(t *testing.T) { + // This test is simulating a case during upgrade of pre 1.0 cortex where + // older ingesters do not have the zone field in their ring structs + // so it gets removed. The current version of the lifecylcer should + // write it back on update during its next heartbeat. + + ringStore, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + var ringConfig Config + flagext.DefaultValues(&ringConfig) + ringConfig.KVStore.Mock = ringStore + + r, err := New(ringConfig, "ingester", ringKey, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), r)) + defer services.StopAndAwaitTerminated(context.Background(), r) //nolint:errcheck + + cfg := testLifecyclerConfig(ringConfig, "ing1") + + // Set ing1 to not have a zone + err = r.KVClient.CAS(context.Background(), ringKey, func(in interface{}) (interface{}, bool, error) { + r := &Desc{ + Ingesters: map[string]InstanceDesc{ + "ing1": { + State: ACTIVE, + Addr: "0.0.0.0", + Tokens: []uint32{1, 4}, + }, + "ing2": { + Tokens: []uint32{2, 3}, + }, + }, + } + + return r, true, nil + }) + require.NoError(t, err) + + l1, err := NewLifecycler(cfg, &nopFlushTransferer{}, "ingester", ringKey, true, log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), l1)) + + // Check that the lifecycler was able to reset the zone value to the expected setting + test.Poll(t, 1000*time.Millisecond, true, func() interface{} { + d, err := r.KVClient.Get(context.Background(), ringKey) + require.NoError(t, err) + desc, ok := d.(*Desc) + return ok && + len(desc.Ingesters) == 2 && + desc.Ingesters["ing1"].Zone == l1.Zone && + desc.Ingesters["ing2"].Zone == "" + + }) +} + +func waitRingInstance(t *testing.T, timeout time.Duration, l *Lifecycler, check func(instance InstanceDesc) error) { + test.Poll(t, timeout, nil, func() interface{} { + desc, err := l.KVStore.Get(context.Background(), l.RingKey) + if err != nil { + return err + } + + ringDesc, ok := desc.(*Desc) + if !ok || ringDesc == nil { + return errors.New("empty ring") + } + + instance, ok := ringDesc.Ingesters[l.ID] + if !ok { + return errors.New("no instance in the ring") + } + + return check(instance) + }) +} diff --git a/pkg/ring/merge_test.go b/pkg/ring/merge_test.go new file mode 100644 index 00000000000..f9f745dd5e2 --- /dev/null +++ b/pkg/ring/merge_test.go @@ -0,0 +1,495 @@ +package ring + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestNormalizationAndConflictResolution(t *testing.T) { + now := time.Now().Unix() + + first := &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{50, 40, 40, 30}}, + "Ing 2": {Addr: "addr2", Timestamp: 123456, State: LEAVING, Tokens: []uint32{100, 5, 5, 100, 100, 200, 20, 10}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEFT, Tokens: []uint32{100, 200, 300}}, + "Ing 4": {Addr: "addr4", Timestamp: now, State: LEAVING, Tokens: []uint32{30, 40, 50}}, + "Unknown": {Tokens: []uint32{100}}, + }, + } + + second := &Desc{ + Ingesters: map[string]InstanceDesc{ + "Unknown": { + Timestamp: now + 10, + Tokens: []uint32{1000, 2000}, + }, + }, + } + + change, err := first.Merge(second, false) + if err != nil { + t.Fatal(err) + } + changeRing := (*Desc)(nil) + if change != nil { + changeRing = change.(*Desc) + } + + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: 123456, State: LEAVING, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEFT}, + "Ing 4": {Addr: "addr4", Timestamp: now, State: LEAVING}, + "Unknown": {Timestamp: now + 10, Tokens: []uint32{1000, 2000}}, + }, + }, first) + + assert.Equal(t, &Desc{ + // change ring is always normalized, "Unknown" ingester has lost two tokens: 100 from first ring (because of second ring), and 1000 (conflict resolution) + Ingesters: map[string]InstanceDesc{ + "Unknown": {Timestamp: now + 10, Tokens: []uint32{1000, 2000}}, + }, + }, changeRing) +} + +func merge(ring1, ring2 *Desc) (*Desc, *Desc) { + change, err := ring1.Merge(ring2, false) + if err != nil { + panic(err) + } + + if change == nil { + return ring1, nil + } + + changeRing := change.(*Desc) + return ring1, changeRing +} + +func TestMerge(t *testing.T) { + now := time.Now().Unix() + + firstRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + secondRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 3": {Addr: "addr3", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{150, 250, 350}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + thirdRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEAVING, Tokens: []uint32{30, 40, 50}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{150, 250, 350}}, + }, + } + } + + expectedFirstSecondMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{150, 250, 350}}, + }, + } + } + + expectedFirstSecondThirdMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEAVING, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{150, 250, 350}}, + }, + } + } + + fourthRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEFT, Tokens: []uint32{30, 40, 50}}, + }, + } + } + + expectedFirstSecondThirdFourthMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEFT, Tokens: nil}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{150, 250, 350}}, + }, + } + } + + { + our, ch := merge(firstRing(), secondRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, secondRing(), ch) // entire second ring is new + } + + { // idempotency: (no change after applying same ring again) + our, ch := merge(expectedFirstSecondMerge(), secondRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, (*Desc)(nil), ch) + } + + { // commutativity: Merge(first, second) == Merge(second, first) + our, ch := merge(secondRing(), firstRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + // when merging first into second ring, only "Ing 1" is new + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + }, + }, ch) + } + + { // associativity: Merge(Merge(first, second), third) == Merge(first, Merge(second, third)) + our1, _ := merge(firstRing(), secondRing()) + our1, _ = merge(our1, thirdRing()) + assert.Equal(t, expectedFirstSecondThirdMerge(), our1) + + our2, _ := merge(secondRing(), thirdRing()) + our2, _ = merge(our2, firstRing()) + assert.Equal(t, expectedFirstSecondThirdMerge(), our2) + } + + { + out, ch := merge(expectedFirstSecondThirdMerge(), fourthRing()) + assert.Equal(t, expectedFirstSecondThirdFourthMerge(), out) + // entire fourth ring is the update -- but without tokens + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEFT, Tokens: nil}, + }, + }, ch) + } +} + +func TestTokensTakeover(t *testing.T) { + now := time.Now().Unix() + + first := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{5, 10, 20}}, // partially migrated from Ing 3 + }, + } + } + + second := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 5, State: LEAVING, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + merged := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 5, State: LEAVING, Tokens: []uint32{100, 200}}, + }, + } + } + + { + our, ch := merge(first(), second()) + assert.Equal(t, merged(), our) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 5, State: LEAVING, Tokens: []uint32{100, 200}}, // change doesn't contain conflicted tokens + }, + }, ch) + } + + { // idempotency: (no change after applying same ring again) + our, ch := merge(merged(), second()) + assert.Equal(t, merged(), our) + assert.Equal(t, (*Desc)(nil), ch) + } + + { // commutativity: (Merge(first, second) == Merge(second, first) + our, ch := merge(second(), first()) + assert.Equal(t, merged(), our) + + // change is different though + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + }, + }, ch) + } +} + +func TestMergeLeft(t *testing.T) { + now := time.Now().Unix() + + firstRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + // Not normalised because it contains duplicate and unsorted tokens. + firstRingNotNormalised := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{20, 10, 5, 10, 20, 100, 200, 100}}, + }, + } + } + + secondRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT}, + }, + } + } + + // Not normalised because it contains a LEFT ingester with tokens. + secondRingNotNormalised := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + expectedFirstSecondMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT}, + }, + } + } + + thirdRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEAVING, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{5, 10, 20, 100, 200}}, // from firstRing + }, + } + } + + expectedFirstSecondThirdMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: LEAVING, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT}, + }, + } + } + + { + our, ch := merge(firstRing(), secondRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT}, + }, + }, ch) + } + { + // Should yield same result when RHS is not normalised. + our, ch := merge(firstRing(), secondRingNotNormalised()) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now, State: LEFT}, + }, + }, ch) + + } + + { // idempotency: (no change after applying same ring again) + our, ch := merge(expectedFirstSecondMerge(), secondRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, (*Desc)(nil), ch) + } + + { // commutativity: Merge(first, second) == Merge(second, first) + our, ch := merge(secondRing(), firstRing()) + assert.Equal(t, expectedFirstSecondMerge(), our) + // when merging first into second ring, only "Ing 1" is new + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + }, + }, ch) + } + { + // Should yield same result when RHS is not normalised. + our, ch := merge(secondRing(), firstRingNotNormalised()) + assert.Equal(t, expectedFirstSecondMerge(), our) + // when merging first into second ring, only "Ing 1" is new + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + }, + }, ch) + + } + + { // associativity: Merge(Merge(first, second), third) == Merge(first, Merge(second, third)) + our1, _ := merge(firstRing(), secondRing()) + our1, _ = merge(our1, thirdRing()) + assert.Equal(t, expectedFirstSecondThirdMerge(), our1) + + our2, _ := merge(secondRing(), thirdRing()) + our2, _ = merge(our2, firstRing()) + assert.Equal(t, expectedFirstSecondThirdMerge(), our2) + } +} + +func TestMergeRemoveMissing(t *testing.T) { + now := time.Now().Unix() + + firstRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now, State: JOINING, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEAVING, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + secondRing := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + expectedFirstSecondMerge := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 3, State: LEFT}, // When deleting, time depends on value passed to merge function. + }, + } + } + + { + our, ch := mergeLocalCAS(firstRing(), secondRing(), now+3) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now + 3, State: LEFT}, // When deleting, time depends on value passed to merge function. + }, + }, ch) // entire second ring is new + } + + { // idempotency: (no change after applying same ring again, even if time has advanced) + our, ch := mergeLocalCAS(expectedFirstSecondMerge(), secondRing(), now+10) + assert.Equal(t, expectedFirstSecondMerge(), our) + assert.Equal(t, (*Desc)(nil), ch) + } + + { // commutativity is broken when deleting missing entries. But let's make sure we get reasonable results at least. + our, ch := mergeLocalCAS(secondRing(), firstRing(), now+3) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEAVING}, + }, + }, our) + + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEAVING}, + }, + }, ch) + } +} + +func TestMergeMissingIntoLeft(t *testing.T) { + now := time.Now().Unix() + + ring1 := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 5, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEFT}, + }, + } + } + + ring2 := func() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + }, + } + } + + { + our, ch := mergeLocalCAS(ring1(), ring2(), now+10) + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + "Ing 3": {Addr: "addr3", Timestamp: now, State: LEFT}, + }, + }, our) + + assert.Equal(t, &Desc{ + Ingesters: map[string]InstanceDesc{ + "Ing 1": {Addr: "addr1", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{30, 40, 50}}, + "Ing 2": {Addr: "addr2", Timestamp: now + 10, State: ACTIVE, Tokens: []uint32{5, 10, 20, 100, 200}}, + // Ing 3 is not changed, it was already LEFT + }, + }, ch) + } +} + +func mergeLocalCAS(ring1, ring2 *Desc, nowUnixTime int64) (*Desc, *Desc) { + change, err := ring1.mergeWithTime(ring2, true, time.Unix(nowUnixTime, 0)) + if err != nil { + panic(err) + } + + if change == nil { + return ring1, nil + } + + changeRing := change.(*Desc) + return ring1, changeRing +} diff --git a/vendor/github.com/grafana/dskit/ring/model.go b/pkg/ring/model.go similarity index 99% rename from vendor/github.com/grafana/dskit/ring/model.go rename to pkg/ring/model.go index 4166d9e6f8e..75c5f0588f8 100644 --- a/vendor/github.com/grafana/dskit/ring/model.go +++ b/pkg/ring/model.go @@ -8,7 +8,6 @@ import ( "time" "github.com/gogo/protobuf/proto" - "github.com/grafana/dskit/kv/codec" "github.com/grafana/dskit/kv/memberlist" ) diff --git a/pkg/ring/model_test.go b/pkg/ring/model_test.go new file mode 100644 index 00000000000..ae39b1ba031 --- /dev/null +++ b/pkg/ring/model_test.go @@ -0,0 +1,418 @@ +package ring + +import ( + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestInstanceDesc_IsHealthy_ForIngesterOperations(t *testing.T) { + t.Parallel() + + tests := map[string]struct { + ingester *InstanceDesc + timeout time.Duration + writeExpected bool + readExpected bool + reportExpected bool + }{ + "ACTIVE ingester with last keepalive newer than timeout": { + ingester: &InstanceDesc{State: ACTIVE, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + writeExpected: true, + readExpected: true, + reportExpected: true, + }, + "ACTIVE ingester with last keepalive older than timeout": { + ingester: &InstanceDesc{State: ACTIVE, Timestamp: time.Now().Add(-90 * time.Second).Unix()}, + timeout: time.Minute, + writeExpected: false, + readExpected: false, + reportExpected: false, + }, + "JOINING ingester with last keepalive newer than timeout": { + ingester: &InstanceDesc{State: JOINING, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + writeExpected: false, + readExpected: false, + reportExpected: true, + }, + "LEAVING ingester with last keepalive newer than timeout": { + ingester: &InstanceDesc{State: LEAVING, Timestamp: time.Now().Add(-30 * time.Second).Unix()}, + timeout: time.Minute, + writeExpected: false, + readExpected: true, + reportExpected: true, + }, + } + + for testName, testData := range tests { + testData := testData + + t.Run(testName, func(t *testing.T) { + actual := testData.ingester.IsHealthy(Write, testData.timeout, time.Now()) + assert.Equal(t, testData.writeExpected, actual) + + actual = testData.ingester.IsHealthy(Read, testData.timeout, time.Now()) + assert.Equal(t, testData.readExpected, actual) + + actual = testData.ingester.IsHealthy(Reporting, testData.timeout, time.Now()) + assert.Equal(t, testData.reportExpected, actual) + }) + } +} + +func TestInstanceDesc_GetRegisteredAt(t *testing.T) { + tests := map[string]struct { + desc *InstanceDesc + expected time.Time + }{ + "should return zero value on nil desc": { + desc: nil, + expected: time.Time{}, + }, + "should return zero value registered timestamp = 0": { + desc: &InstanceDesc{ + RegisteredTimestamp: 0, + }, + expected: time.Time{}, + }, + "should return timestamp parsed from desc": { + desc: &InstanceDesc{ + RegisteredTimestamp: time.Unix(10000000, 0).Unix(), + }, + expected: time.Unix(10000000, 0), + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.True(t, testData.desc.GetRegisteredAt().Equal(testData.expected)) + }) + } +} + +func normalizedSource() *Desc { + r := NewDesc() + r.Ingesters["first"] = InstanceDesc{ + Tokens: []uint32{100, 200, 300}, + } + r.Ingesters["second"] = InstanceDesc{} + return r +} + +func normalizedOutput() *Desc { + return &Desc{ + Ingesters: map[string]InstanceDesc{ + "first": {}, + "second": {Tokens: []uint32{100, 200, 300}}, + }, + } +} + +func TestClaimTokensFromNormalizedToNormalized(t *testing.T) { + r := normalizedSource() + result := r.ClaimTokens("first", "second") + + assert.Equal(t, Tokens{100, 200, 300}, result) + assert.Equal(t, normalizedOutput(), r) +} + +func TestDesc_Ready(t *testing.T) { + now := time.Now() + + r := &Desc{ + Ingesters: map[string]InstanceDesc{ + "ing1": { + Tokens: []uint32{100, 200, 300}, + State: ACTIVE, + Timestamp: now.Unix(), + }, + }, + } + + if err := r.IsReady(now, 10*time.Second); err != nil { + t.Fatal("expected ready, got", err) + } + + if err := r.IsReady(now, 0); err != nil { + t.Fatal("expected ready, got", err) + } + + if err := r.IsReady(now.Add(5*time.Minute), 10*time.Second); err == nil { + t.Fatal("expected !ready (no heartbeat from active ingester), but got no error") + } + + if err := r.IsReady(now.Add(5*time.Minute), 0); err != nil { + t.Fatal("expected ready (no heartbeat but timeout disabled), got", err) + } + + r = &Desc{ + Ingesters: map[string]InstanceDesc{ + "ing1": { + State: ACTIVE, + Timestamp: now.Unix(), + }, + }, + } + + if err := r.IsReady(now, 10*time.Second); err == nil { + t.Fatal("expected !ready (no tokens), but got no error") + } + + r.Ingesters["some ingester"] = InstanceDesc{ + Tokens: []uint32{12345}, + Timestamp: now.Unix(), + } + + if err := r.IsReady(now, 10*time.Second); err != nil { + t.Fatal("expected ready, got", err) + } +} + +func TestDesc_getTokensByZone(t *testing.T) { + tests := map[string]struct { + desc *Desc + expected map[string][]uint32 + }{ + "empty ring": { + desc: &Desc{Ingesters: map[string]InstanceDesc{}}, + expected: map[string][]uint32{}, + }, + "single zone": { + desc: &Desc{Ingesters: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Tokens: []uint32{1, 5}, Zone: ""}, + "instance-2": {Addr: "127.0.0.1", Tokens: []uint32{2, 4}, Zone: ""}, + "instance-3": {Addr: "127.0.0.1", Tokens: []uint32{3, 6}, Zone: ""}, + }}, + expected: map[string][]uint32{ + "": {1, 2, 3, 4, 5, 6}, + }, + }, + "multiple zones": { + desc: &Desc{Ingesters: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Tokens: []uint32{1, 5}, Zone: "zone-1"}, + "instance-2": {Addr: "127.0.0.1", Tokens: []uint32{2, 4}, Zone: "zone-1"}, + "instance-3": {Addr: "127.0.0.1", Tokens: []uint32{3, 6}, Zone: "zone-2"}, + }}, + expected: map[string][]uint32{ + "zone-1": {1, 2, 4, 5}, + "zone-2": {3, 6}, + }, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expected, testData.desc.getTokensByZone()) + }) + } +} + +func TestDesc_TokensFor(t *testing.T) { + tests := map[string]struct { + desc *Desc + expectedMine Tokens + expectedAll Tokens + }{ + "empty ring": { + desc: &Desc{Ingesters: map[string]InstanceDesc{}}, + expectedMine: Tokens(nil), + expectedAll: Tokens{}, + }, + "single zone": { + desc: &Desc{Ingesters: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Tokens: []uint32{1, 5}, Zone: ""}, + "instance-2": {Addr: "127.0.0.1", Tokens: []uint32{2, 4}, Zone: ""}, + "instance-3": {Addr: "127.0.0.1", Tokens: []uint32{3, 6}, Zone: ""}, + }}, + expectedMine: Tokens{1, 5}, + expectedAll: Tokens{1, 2, 3, 4, 5, 6}, + }, + "multiple zones": { + desc: &Desc{Ingesters: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Tokens: []uint32{1, 5}, Zone: "zone-1"}, + "instance-2": {Addr: "127.0.0.1", Tokens: []uint32{2, 4}, Zone: "zone-1"}, + "instance-3": {Addr: "127.0.0.1", Tokens: []uint32{3, 6}, Zone: "zone-2"}, + }}, + expectedMine: Tokens{1, 5}, + expectedAll: Tokens{1, 2, 3, 4, 5, 6}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + actualMine, actualAll := testData.desc.TokensFor("instance-1") + assert.Equal(t, testData.expectedMine, actualMine) + assert.Equal(t, testData.expectedAll, actualAll) + }) + } +} + +func TestDesc_RingsCompare(t *testing.T) { + tests := map[string]struct { + r1, r2 *Desc + expected CompareResult + }{ + "nil rings": { + r1: nil, + r2: nil, + expected: Equal, + }, + "one nil, one empty ring": { + r1: nil, + r2: &Desc{Ingesters: map[string]InstanceDesc{}}, + expected: Equal, + }, + "two empty rings": { + r1: &Desc{Ingesters: map[string]InstanceDesc{}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{}}, + expected: Equal, + }, + "same single instance": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}}, + expected: Equal, + }, + "same single instance, different timestamp": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Timestamp: 123456}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Timestamp: 789012}}}, + expected: EqualButStatesAndTimestamps, + }, + "same single instance, different state": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: JOINING}}}, + expected: EqualButStatesAndTimestamps, + }, + "same single instance, different registered timestamp": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE, RegisteredTimestamp: 1}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", State: ACTIVE, RegisteredTimestamp: 2}}}, + expected: Different, + }, + "instance in different zone": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Zone: "one"}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Zone: "two"}}}, + expected: Different, + }, + "same instance, different address": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr2"}}}, + expected: Different, + }, + "more instances in one ring": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}, "ing2": {Addr: "ing2"}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}}, + expected: Different, + }, + "different tokens": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Tokens: []uint32{1, 2, 3}}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1"}}}, + expected: Different, + }, + "different tokens 2": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Tokens: []uint32{1, 2, 3}}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Tokens: []uint32{1, 2, 4}}}}, + expected: Different, + }, + "same number of instances, using different IDs": { + r1: &Desc{Ingesters: map[string]InstanceDesc{"ing1": {Addr: "addr1", Tokens: []uint32{1, 2, 3}}}}, + r2: &Desc{Ingesters: map[string]InstanceDesc{"ing2": {Addr: "addr1", Tokens: []uint32{1, 2, 3}}}}, + expected: Different, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expected, testData.r1.RingCompare(testData.r2)) + assert.Equal(t, testData.expected, testData.r2.RingCompare(testData.r1)) + }) + } +} + +func TestMergeTokens(t *testing.T) { + tests := map[string]struct { + input [][]uint32 + expected []uint32 + }{ + "empty input": { + input: nil, + expected: []uint32{}, + }, + "single instance in input": { + input: [][]uint32{ + {1, 3, 4, 8}, + }, + expected: []uint32{1, 3, 4, 8}, + }, + "multiple instances in input": { + input: [][]uint32{ + {1, 3, 4, 8}, + {0, 2, 6, 9}, + {5, 7, 10, 11}, + }, + expected: []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + }, + "some instances have no tokens": { + input: [][]uint32{ + {1, 3, 4, 8}, + {}, + {0, 2, 6, 9}, + {}, + {5, 7, 10, 11}, + }, + expected: []uint32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expected, MergeTokens(testData.input)) + }) + } +} + +func TestMergeTokensByZone(t *testing.T) { + tests := map[string]struct { + input map[string][][]uint32 + expected map[string][]uint32 + }{ + "empty input": { + input: nil, + expected: map[string][]uint32{}, + }, + "single zone": { + input: map[string][][]uint32{ + "zone-1": { + {1, 3, 4, 8}, + {2, 5, 6, 7}, + }, + }, + expected: map[string][]uint32{ + "zone-1": {1, 2, 3, 4, 5, 6, 7, 8}, + }, + }, + "multiple zones": { + input: map[string][][]uint32{ + "zone-1": { + {1, 3, 4, 8}, + {2, 5, 6, 7}, + }, + "zone-2": { + {3, 5}, + {2, 4}, + }, + }, + expected: map[string][]uint32{ + "zone-1": {1, 2, 3, 4, 5, 6, 7, 8}, + "zone-2": {2, 3, 4, 5}, + }, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expected, MergeTokensByZone(testData.input)) + }) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/replication_set.go b/pkg/ring/replication_set.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/replication_set.go rename to pkg/ring/replication_set.go diff --git a/pkg/ring/replication_set_test.go b/pkg/ring/replication_set_test.go new file mode 100644 index 00000000000..42ecc0f1122 --- /dev/null +++ b/pkg/ring/replication_set_test.go @@ -0,0 +1,311 @@ +package ring + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/atomic" +) + +func TestReplicationSet_GetAddresses(t *testing.T) { + tests := map[string]struct { + rs ReplicationSet + expected []string + }{ + "should return an empty slice on empty replication set": { + rs: ReplicationSet{}, + expected: []string{}, + }, + "should return instances addresses (no order guaranteed)": { + rs: ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1"}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + }, + expected: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.ElementsMatch(t, testData.expected, testData.rs.GetAddresses()) + }) + } +} + +func TestReplicationSet_GetAddressesWithout(t *testing.T) { + tests := map[string]struct { + rs ReplicationSet + expected []string + exclude string + }{ + "should return an empty slice on empty replication set": { + rs: ReplicationSet{}, + expected: []string{}, + exclude: "127.0.0.1", + }, + "non-matching exclusion, should return all addresses": { + rs: ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1"}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + }, + expected: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"}, + exclude: "127.0.0.4", + }, + "matching exclusion, should return non-excluded addresses": { + rs: ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1"}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + }, + expected: []string{"127.0.0.1", "127.0.0.3"}, + exclude: "127.0.0.2", + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + assert.ElementsMatch(t, testData.expected, testData.rs.GetAddressesWithout(testData.exclude)) + }) + } +} + +var ( + errFailure = errors.New("failed") + errZoneFailure = errors.New("zone failed") +) + +// Return a function that fails starting from failAfter times +func failingFunctionAfter(failAfter int32, delay time.Duration) func(context.Context, *InstanceDesc) (interface{}, error) { + count := atomic.NewInt32(0) + return func(context.Context, *InstanceDesc) (interface{}, error) { + time.Sleep(delay) + if count.Inc() > failAfter { + return nil, errFailure + } + return 1, nil + } +} + +func failingFunctionOnZones(zones ...string) func(context.Context, *InstanceDesc) (interface{}, error) { + return func(ctx context.Context, ing *InstanceDesc) (interface{}, error) { + for _, zone := range zones { + if ing.Zone == zone { + return nil, errZoneFailure + } + } + return 1, nil + } +} + +func TestReplicationSet_Do(t *testing.T) { + tests := []struct { + name string + instances []InstanceDesc + maxErrors int + maxUnavailableZones int + f func(context.Context, *InstanceDesc) (interface{}, error) + delay time.Duration + cancelContextDelay time.Duration + want []interface{} + expectedError error + }{ + { + name: "max errors = 0, no errors no delay", + instances: []InstanceDesc{ + {}, + }, + f: func(c context.Context, id *InstanceDesc) (interface{}, error) { + return 1, nil + }, + want: []interface{}{1}, + }, + { + name: "max errors = 0, should fail on 1 error out of 1 instance", + instances: []InstanceDesc{{}}, + f: func(c context.Context, id *InstanceDesc) (interface{}, error) { + return nil, errFailure + }, + want: nil, + expectedError: errFailure, + }, + { + name: "max errors = 0, should fail on 1 error out of 3 instances (last call fails)", + instances: []InstanceDesc{{}, {}, {}}, + f: failingFunctionAfter(2, 10*time.Millisecond), + want: nil, + expectedError: errFailure, + }, + { + name: "max errors = 1, should fail on 3 errors out of 5 instances (last calls fail)", + instances: []InstanceDesc{{}, {}, {}, {}, {}}, + maxErrors: 1, + f: failingFunctionAfter(2, 10*time.Millisecond), + delay: 100 * time.Millisecond, + want: nil, + expectedError: errFailure, + }, + { + name: "max errors = 1, should handle context canceled", + instances: []InstanceDesc{{}, {}, {}}, + maxErrors: 1, + f: func(c context.Context, id *InstanceDesc) (interface{}, error) { + time.Sleep(300 * time.Millisecond) + return 1, nil + }, + cancelContextDelay: 100 * time.Millisecond, + want: nil, + expectedError: context.Canceled, + }, + { + name: "max errors = 0, should succeed on all successful instances", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, + f: func(c context.Context, id *InstanceDesc) (interface{}, error) { + return 1, nil + }, + want: []interface{}{1, 1, 1}, + }, + { + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (3 instances)", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, + f: failingFunctionOnZones("zone1"), + maxUnavailableZones: 1, + want: []interface{}{1, 1}, + }, + { + name: "max unavailable zones = 1, should fail on instances failing in 2 out of 3 zones (3 instances)", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}}, + f: failingFunctionOnZones("zone1", "zone2"), + maxUnavailableZones: 1, + expectedError: errZoneFailure, + }, + { + name: "max unavailable zones = 1, should succeed on instances failing in 1 out of 3 zones (6 instances)", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone3"}}, + f: failingFunctionOnZones("zone1"), + maxUnavailableZones: 1, + want: []interface{}{1, 1, 1, 1}, + }, + { + name: "max unavailable zones = 2, should fail on instances failing in 3 out of 5 zones (5 instances)", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone4"}, {Zone: "zone5"}}, + f: failingFunctionOnZones("zone1", "zone2", "zone3"), + maxUnavailableZones: 2, + expectedError: errZoneFailure, + }, + { + name: "max unavailable zones = 2, should succeed on instances failing in 2 out of 5 zones (10 instances)", + instances: []InstanceDesc{{Zone: "zone1"}, {Zone: "zone1"}, {Zone: "zone2"}, {Zone: "zone2"}, {Zone: "zone3"}, {Zone: "zone3"}, {Zone: "zone4"}, {Zone: "zone4"}, {Zone: "zone5"}, {Zone: "zone5"}}, + f: failingFunctionOnZones("zone1", "zone5"), + maxUnavailableZones: 2, + want: []interface{}{1, 1, 1, 1, 1, 1}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Ensure the test case has been correctly setup (max errors and max unavailable zones are + // mutually exclusive). + require.False(t, tt.maxErrors > 0 && tt.maxUnavailableZones > 0) + + r := ReplicationSet{ + Instances: tt.instances, + MaxErrors: tt.maxErrors, + MaxUnavailableZones: tt.maxUnavailableZones, + } + ctx := context.Background() + if tt.cancelContextDelay > 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithCancel(ctx) + time.AfterFunc(tt.cancelContextDelay, func() { + cancel() + }) + } + got, err := r.Do(ctx, tt.delay, tt.f) + if tt.expectedError != nil { + assert.Equal(t, tt.expectedError, err) + } else { + assert.NoError(t, err) + } + assert.Equal(t, tt.want, got) + }) + } +} + +var ( + replicationSetChangesInitialState = ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1"}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + } + replicationSetChangesTestCases = map[string]struct { + nextState ReplicationSet + expectHasReplicationSetChanged bool + expectHasReplicationSetChangedWithoutState bool + }{ + "timestamp changed": { + ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1", Timestamp: time.Hour.Microseconds()}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + }, + false, + false, + }, + "state changed": { + ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1", State: PENDING}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + }, + }, + true, + false, + }, + "more instances": { + ReplicationSet{ + Instances: []InstanceDesc{ + {Addr: "127.0.0.1"}, + {Addr: "127.0.0.2"}, + {Addr: "127.0.0.3"}, + {Addr: "127.0.0.4"}, + }, + }, + true, + true, + }, + } +) + +func TestHasReplicationSetChanged_IgnoresTimeStamp(t *testing.T) { + // Only testing difference to underlying Equal function + for testName, testData := range replicationSetChangesTestCases { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expectHasReplicationSetChanged, HasReplicationSetChanged(replicationSetChangesInitialState, testData.nextState), "HasReplicationSetChanged wrong result") + }) + } +} + +func TestHasReplicationSetChangedWithoutState_IgnoresTimeStampAndState(t *testing.T) { + // Only testing difference to underlying Equal function + for testName, testData := range replicationSetChangesTestCases { + t.Run(testName, func(t *testing.T) { + assert.Equal(t, testData.expectHasReplicationSetChangedWithoutState, HasReplicationSetChangedWithoutState(replicationSetChangesInitialState, testData.nextState), "HasReplicationSetChangedWithoutState wrong result") + }) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/replication_set_tracker.go b/pkg/ring/replication_set_tracker.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/replication_set_tracker.go rename to pkg/ring/replication_set_tracker.go diff --git a/pkg/ring/replication_set_tracker_test.go b/pkg/ring/replication_set_tracker_test.go new file mode 100644 index 00000000000..f24d23c00a2 --- /dev/null +++ b/pkg/ring/replication_set_tracker_test.go @@ -0,0 +1,266 @@ +package ring + +import ( + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDefaultResultTracker(t *testing.T) { + instance1 := InstanceDesc{Addr: "127.0.0.1"} + instance2 := InstanceDesc{Addr: "127.0.0.2"} + instance3 := InstanceDesc{Addr: "127.0.0.3"} + instance4 := InstanceDesc{Addr: "127.0.0.4"} + + tests := map[string]struct { + instances []InstanceDesc + maxErrors int + run func(t *testing.T, tracker *defaultResultTracker) + }{ + "should succeed on no instances to track": { + instances: nil, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed once all instances succeed on max errors = 0": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4}, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance4, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should fail on 1st failing instance on max errors = 0": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4}, + maxErrors: 0, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should fail on 2nd failing instance on max errors = 1": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4}, + maxErrors: 1, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should fail on 3rd failing instance on max errors = 2": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4}, + maxErrors: 2, + run: func(t *testing.T, tracker *defaultResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance4, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + testCase.run(t, newDefaultResultTracker(testCase.instances, testCase.maxErrors)) + }) + } +} + +func TestZoneAwareResultTracker(t *testing.T) { + instance1 := InstanceDesc{Addr: "127.0.0.1", Zone: "zone-a"} + instance2 := InstanceDesc{Addr: "127.0.0.2", Zone: "zone-a"} + instance3 := InstanceDesc{Addr: "127.0.0.3", Zone: "zone-b"} + instance4 := InstanceDesc{Addr: "127.0.0.4", Zone: "zone-b"} + instance5 := InstanceDesc{Addr: "127.0.0.5", Zone: "zone-c"} + instance6 := InstanceDesc{Addr: "127.0.0.6", Zone: "zone-c"} + + tests := map[string]struct { + instances []InstanceDesc + maxUnavailableZones int + run func(t *testing.T, tracker *zoneAwareResultTracker) + }{ + "should succeed on no instances to track": { + instances: nil, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed once all instances succeed on max unavailable zones = 0": { + instances: []InstanceDesc{instance1, instance2, instance3}, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance3, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should fail on 1st failing instance on max unavailable zones = 0": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 0, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance2, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.True(t, tracker.failed()) + }, + }, + "should succeed on 2 failing instances within the same zone on max unavailable zones = 1": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 1, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track failing instances. + for _, instance := range []InstanceDesc{instance1, instance2} { + tracker.done(&instance, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + // Track successful instances. + for _, instance := range []InstanceDesc{instance3, instance4, instance5} { + tracker.done(&instance, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + tracker.done(&instance6, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed as soon as the response has been successfully received from 'all zones - 1' on max unavailable zones = 1": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 1, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track successful instances. + for _, instance := range []InstanceDesc{instance1, instance2, instance3} { + tracker.done(&instance, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + tracker.done(&instance4, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed on failing instances within 2 zones on max unavailable zones = 2": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 2, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Track failing instances. + for _, instance := range []InstanceDesc{instance1, instance2, instance3, instance4} { + tracker.done(&instance, errors.New("test")) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + } + + // Track successful instances. + tracker.done(&instance5, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + tracker.done(&instance6, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + "should succeed as soon as the response has been successfully received from 'all zones - 2' on max unavailable zones = 2": { + instances: []InstanceDesc{instance1, instance2, instance3, instance4, instance5, instance6}, + maxUnavailableZones: 2, + run: func(t *testing.T, tracker *zoneAwareResultTracker) { + // Zone-a + tracker.done(&instance1, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + // Zone-b + tracker.done(&instance3, nil) + assert.False(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + + // Zone-a + tracker.done(&instance2, nil) + assert.True(t, tracker.succeeded()) + assert.False(t, tracker.failed()) + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + testCase.run(t, newZoneAwareResultTracker(testCase.instances, testCase.maxUnavailableZones)) + }) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/replication_strategy.go b/pkg/ring/replication_strategy.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/replication_strategy.go rename to pkg/ring/replication_strategy.go diff --git a/pkg/ring/replication_strategy_test.go b/pkg/ring/replication_strategy_test.go new file mode 100644 index 00000000000..1fe5d0e9187 --- /dev/null +++ b/pkg/ring/replication_strategy_test.go @@ -0,0 +1,165 @@ +package ring + +import ( + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" +) + +func TestRingReplicationStrategy(t *testing.T) { + for i, tc := range []struct { + replicationFactor, liveIngesters, deadIngesters int + expectedMaxFailure int + expectedError string + }{ + // Ensure it works for a single ingester, for local testing. + { + replicationFactor: 1, + liveIngesters: 1, + expectedMaxFailure: 0, + }, + + { + replicationFactor: 1, + deadIngesters: 1, + expectedError: "at least 1 live replicas required, could only find 0 - unhealthy instances: dead1", + }, + + // Ensure it works for RF=3 and 2 ingesters. + { + replicationFactor: 3, + liveIngesters: 2, + expectedMaxFailure: 0, + }, + + // Ensure it works for the default production config. + { + replicationFactor: 3, + liveIngesters: 3, + expectedMaxFailure: 1, + }, + + { + replicationFactor: 3, + liveIngesters: 2, + deadIngesters: 1, + expectedMaxFailure: 0, + }, + + { + replicationFactor: 3, + liveIngesters: 1, + deadIngesters: 2, + expectedError: "at least 2 live replicas required, could only find 1 - unhealthy instances: dead1,dead2", + }, + + // Ensure it works when adding / removing nodes. + + // A node is joining or leaving, replica set expands. + { + replicationFactor: 3, + liveIngesters: 4, + expectedMaxFailure: 1, + }, + + { + replicationFactor: 3, + liveIngesters: 3, + deadIngesters: 1, + expectedMaxFailure: 0, + }, + + { + replicationFactor: 3, + liveIngesters: 2, + deadIngesters: 2, + expectedError: "at least 3 live replicas required, could only find 2 - unhealthy instances: dead1,dead2", + }, + } { + ingesters := []InstanceDesc{} + for i := 0; i < tc.liveIngesters; i++ { + ingesters = append(ingesters, InstanceDesc{ + Timestamp: time.Now().Unix(), + }) + } + for i := 0; i < tc.deadIngesters; i++ { + ingesters = append(ingesters, InstanceDesc{Addr: fmt.Sprintf("dead%d", i+1)}) + } + + t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) { + strategy := NewDefaultReplicationStrategy() + liveIngesters, maxFailure, err := strategy.Filter(ingesters, Read, tc.replicationFactor, 100*time.Second, false) + if tc.expectedError == "" { + assert.NoError(t, err) + assert.Equal(t, tc.liveIngesters, len(liveIngesters)) + assert.Equal(t, tc.expectedMaxFailure, maxFailure) + } else { + assert.EqualError(t, err, tc.expectedError) + } + }) + } +} + +func TestIgnoreUnhealthyInstancesReplicationStrategy(t *testing.T) { + for _, tc := range []struct { + name string + liveIngesters, deadIngesters int + expectedMaxFailure int + expectedError string + }{ + { + name: "with at least 1 healthy instance", + liveIngesters: 1, + expectedMaxFailure: 0, + }, + { + name: "with more healthy instances than unhealthy", + deadIngesters: 1, + liveIngesters: 2, + expectedMaxFailure: 1, + }, + { + name: "with more unhealthy instances than healthy", + deadIngesters: 1, + liveIngesters: 2, + expectedMaxFailure: 1, + }, + { + name: "with equal number of healthy and unhealthy instances", + deadIngesters: 2, + liveIngesters: 2, + expectedMaxFailure: 1, + }, + { + name: "with no healthy instances", + liveIngesters: 0, + deadIngesters: 3, + expectedMaxFailure: 0, + expectedError: "at least 1 healthy replica required, could only find 0 - unhealthy instances: dead1,dead2,dead3", + }, + } { + ingesters := []InstanceDesc{} + for i := 0; i < tc.liveIngesters; i++ { + ingesters = append(ingesters, InstanceDesc{ + Timestamp: time.Now().Unix(), + }) + } + for i := 0; i < tc.deadIngesters; i++ { + ingesters = append(ingesters, InstanceDesc{Addr: fmt.Sprintf("dead%d", i+1)}) + } + + t.Run(tc.name, func(t *testing.T) { + strategy := NewIgnoreUnhealthyInstancesReplicationStrategy() + liveIngesters, maxFailure, err := strategy.Filter(ingesters, Read, 3, 100*time.Second, false) + if tc.expectedError == "" { + assert.NoError(t, err) + assert.Equal(t, tc.liveIngesters, len(liveIngesters)) + assert.Equal(t, tc.expectedMaxFailure, maxFailure) + } else { + assert.EqualError(t, err, tc.expectedError) + } + }) + } +} diff --git a/vendor/github.com/grafana/dskit/ring/ring.go b/pkg/ring/ring.go similarity index 99% rename from vendor/github.com/grafana/dskit/ring/ring.go rename to pkg/ring/ring.go index 6aaf165bf97..a0784d954f2 100644 --- a/vendor/github.com/grafana/dskit/ring/ring.go +++ b/pkg/ring/ring.go @@ -13,17 +13,16 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv" + "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/grafana/dskit/kv" - shardUtil "github.com/grafana/dskit/ring/shard" - "github.com/grafana/dskit/ring/util" - "github.com/grafana/dskit/services" - - "github.com/grafana/dskit/flagext" - dsmath "github.com/grafana/dskit/internal/math" + shardUtil "github.com/cortexproject/cortex/pkg/ring/shard" + "github.com/cortexproject/cortex/pkg/ring/util" + utilmath "github.com/cortexproject/cortex/pkg/util/math" ) const ( @@ -461,7 +460,7 @@ func (r *Ring) GetReplicationSetForOperation(op Operation) (ReplicationSet, erro // Given data is replicated to RF different zones, we can tolerate a number of // RF/2 failing zones. However, we need to protect from the case the ring currently // contains instances in a number of zones < RF. - numReplicatedZones := dsmath.Min(len(r.ringZones), r.cfg.ReplicationFactor) + numReplicatedZones := utilmath.Min(len(r.ringZones), r.cfg.ReplicationFactor) minSuccessZones := (numReplicatedZones / 2) + 1 maxUnavailableZones = minSuccessZones - 1 diff --git a/vendor/github.com/grafana/dskit/ring/ring.pb.go b/pkg/ring/ring.pb.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/ring.pb.go rename to pkg/ring/ring.pb.go diff --git a/vendor/github.com/grafana/dskit/ring/ring.proto b/pkg/ring/ring.proto similarity index 100% rename from vendor/github.com/grafana/dskit/ring/ring.proto rename to pkg/ring/ring.proto diff --git a/pkg/ring/ring_test.go b/pkg/ring/ring_test.go new file mode 100644 index 00000000000..684c712dc98 --- /dev/null +++ b/pkg/ring/ring_test.go @@ -0,0 +1,2363 @@ +package ring + +import ( + "bytes" + "context" + "fmt" + "math" + "math/rand" + "sort" + "strconv" + "strings" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/flagext" + "github.com/grafana/dskit/kv" + "github.com/grafana/dskit/kv/consul" + "github.com/grafana/dskit/services" + "github.com/grafana/dskit/test" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/cortexproject/cortex/pkg/ring/shard" + "github.com/cortexproject/cortex/pkg/ring/util" +) + +const ( + numTokens = 512 +) + +func BenchmarkBatch10x100(b *testing.B) { + benchmarkBatch(b, 10, 100) +} + +func BenchmarkBatch100x100(b *testing.B) { + benchmarkBatch(b, 100, 100) +} + +func BenchmarkBatch100x1000(b *testing.B) { + benchmarkBatch(b, 100, 1000) +} + +func benchmarkBatch(b *testing.B, numInstances, numKeys int) { + // Make a random ring with N instances, and M tokens per ingests + desc := NewDesc() + takenTokens := []uint32{} + for i := 0; i < numInstances; i++ { + tokens := GenerateTokens(numTokens, takenTokens) + takenTokens = append(takenTokens, tokens...) + desc.AddIngester(fmt.Sprintf("%d", i), fmt.Sprintf("instance-%d", i), strconv.Itoa(i), tokens, ACTIVE, time.Now()) + } + + cfg := Config{} + flagext.DefaultValues(&cfg) + r := Ring{ + cfg: cfg, + ringDesc: desc, + strategy: NewDefaultReplicationStrategy(), + } + + ctx := context.Background() + callback := func(InstanceDesc, []int) error { + return nil + } + cleanup := func() { + } + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + keys := make([]uint32, numKeys) + // Generate a batch of N random keys, and look them up + b.ResetTimer() + for i := 0; i < b.N; i++ { + generateKeys(rnd, numKeys, keys) + err := DoBatch(ctx, Write, &r, keys, callback, cleanup) + require.NoError(b, err) + } +} + +func generateKeys(r *rand.Rand, numTokens int, dest []uint32) { + for i := 0; i < numTokens; i++ { + dest[i] = r.Uint32() + } +} + +func BenchmarkUpdateRingState(b *testing.B) { + for _, numInstances := range []int{50, 100, 500} { + for _, numZones := range []int{1, 3} { + for _, numTokens := range []int{128, 256, 512} { + for _, updateTokens := range []bool{false, true} { + b.Run(fmt.Sprintf("num instances = %d, num zones = %d, num tokens = %d, update tokens = %t", numInstances, numZones, numTokens, updateTokens), func(b *testing.B) { + benchmarkUpdateRingState(b, numInstances, numZones, numTokens, updateTokens) + }) + } + } + } + } +} + +func benchmarkUpdateRingState(b *testing.B, numInstances, numZones, numTokens int, updateTokens bool) { + cfg := Config{ + KVStore: kv.Config{}, + HeartbeatTimeout: 0, // get healthy stats + ReplicationFactor: 3, + ZoneAwarenessEnabled: true, + } + + // create the ring to set up metrics, but do not start + registry := prometheus.NewRegistry() + ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, nil, NewDefaultReplicationStrategy(), registry, log.NewNopLogger()) + require.NoError(b, err) + + // Make a random ring with N instances, and M tokens per ingests + // Also make a copy with different timestamps and one with different tokens + desc := NewDesc() + otherDesc := NewDesc() + takenTokens := []uint32{} + otherTakenTokens := []uint32{} + for i := 0; i < numInstances; i++ { + tokens := GenerateTokens(numTokens, takenTokens) + takenTokens = append(takenTokens, tokens...) + now := time.Now() + id := fmt.Sprintf("%d", i) + desc.AddIngester(id, fmt.Sprintf("instance-%d", i), strconv.Itoa(i), tokens, ACTIVE, now) + if updateTokens { + otherTokens := GenerateTokens(numTokens, otherTakenTokens) + otherTakenTokens = append(otherTakenTokens, otherTokens...) + otherDesc.AddIngester(id, fmt.Sprintf("instance-%d", i), strconv.Itoa(i), otherTokens, ACTIVE, now) + } else { + otherDesc.AddIngester(id, fmt.Sprintf("instance-%d", i), strconv.Itoa(i), tokens, JOINING, now) + } + } + + if updateTokens { + require.Equal(b, Different, desc.RingCompare(otherDesc)) + } else { + require.Equal(b, EqualButStatesAndTimestamps, desc.RingCompare(otherDesc)) + } + + flipFlop := true + b.ResetTimer() + for n := 0; n < b.N; n++ { + if flipFlop { + ring.updateRingState(desc) + } else { + ring.updateRingState(otherDesc) + } + flipFlop = !flipFlop + } +} + +func TestDoBatchZeroInstances(t *testing.T) { + ctx := context.Background() + numKeys := 10 + keys := make([]uint32, numKeys) + rnd := rand.New(rand.NewSource(time.Now().UnixNano())) + generateKeys(rnd, numKeys, keys) + callback := func(InstanceDesc, []int) error { + return nil + } + cleanup := func() { + } + desc := NewDesc() + r := Ring{ + cfg: Config{}, + ringDesc: desc, + strategy: NewDefaultReplicationStrategy(), + } + require.Error(t, DoBatch(ctx, Write, &r, keys, callback, cleanup)) +} + +func TestAddIngester(t *testing.T) { + r := NewDesc() + + const ingName = "ing1" + + now := time.Now() + ing1Tokens := GenerateTokens(128, nil) + + r.AddIngester(ingName, "addr", "1", ing1Tokens, ACTIVE, now) + + assert.Equal(t, "addr", r.Ingesters[ingName].Addr) + assert.Equal(t, ing1Tokens, r.Ingesters[ingName].Tokens) + assert.InDelta(t, time.Now().Unix(), r.Ingesters[ingName].Timestamp, 2) + assert.Equal(t, now.Unix(), r.Ingesters[ingName].RegisteredTimestamp) +} + +func TestAddIngesterReplacesExistingTokens(t *testing.T) { + r := NewDesc() + + const ing1Name = "ing1" + + // old tokens will be replaced + r.Ingesters[ing1Name] = InstanceDesc{ + Tokens: []uint32{11111, 22222, 33333}, + } + + newTokens := GenerateTokens(128, nil) + + r.AddIngester(ing1Name, "addr", "1", newTokens, ACTIVE, time.Now()) + + require.Equal(t, newTokens, r.Ingesters[ing1Name].Tokens) +} + +func TestRing_Get_ZoneAwarenessWithIngesterLeaving(t *testing.T) { + const testCount = 10000 + + tests := map[string]struct { + replicationFactor int + expectedInstances int + expectedZones int + }{ + "should succeed if there are enough instances per zone on RF = 3": { + replicationFactor: 3, + expectedInstances: 3, + expectedZones: 3, + }, + "should succeed if there are enough instances per zone on RF = 2": { + replicationFactor: 2, + expectedInstances: 2, + expectedZones: 2, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + r := NewDesc() + instances := map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", State: ACTIVE}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", State: ACTIVE}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", State: ACTIVE}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", State: ACTIVE}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", State: LEAVING}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", State: ACTIVE}, + } + var prevTokens []uint32 + for id, instance := range instances { + ingTokens := GenerateTokens(128, prevTokens) + r.AddIngester(id, instance.Addr, instance.Zone, ingTokens, instance.State, time.Now()) + prevTokens = append(prevTokens, ingTokens...) + } + instancesList := make([]InstanceDesc, 0, len(r.GetIngesters())) + for _, v := range r.GetIngesters() { + instancesList = append(instancesList, v) + } + + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ReplicationFactor: testData.replicationFactor, + ZoneAwarenessEnabled: true, + }, + ringDesc: r, + ringTokens: r.GetTokens(), + ringTokensByZone: r.getTokensByZone(), + ringInstanceByToken: r.getTokensInfo(), + ringZones: getZones(r.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + _, bufHosts, bufZones := MakeBuffersForGet() + + // Use the GenerateTokens to get an array of random uint32 values. + testValues := GenerateTokens(testCount, nil) + + for i := 0; i < testCount; i++ { + set, err := ring.Get(testValues[i], Write, instancesList, bufHosts, bufZones) + require.NoError(t, err) + + distinctZones := map[string]int{} + for _, instance := range set.Instances { + distinctZones[instance.Zone]++ + } + + assert.Len(t, set.Instances, testData.expectedInstances) + assert.Len(t, distinctZones, testData.expectedZones) + } + }) + } +} + +func TestRing_Get_ZoneAwareness(t *testing.T) { + // Number of tests to run. + const testCount = 10000 + + tests := map[string]struct { + numInstances int + numZones int + replicationFactor int + zoneAwarenessEnabled bool + expectedErr string + expectedInstances int + }{ + "should succeed if there are enough instances per zone on RF = 3": { + numInstances: 16, + numZones: 3, + replicationFactor: 3, + zoneAwarenessEnabled: true, + expectedInstances: 3, + }, + "should fail if there are instances in 1 zone only on RF = 3": { + numInstances: 16, + numZones: 1, + replicationFactor: 3, + zoneAwarenessEnabled: true, + expectedErr: "at least 2 live replicas required across different availability zones, could only find 1", + }, + "should succeed if there are instances in 2 zones on RF = 3": { + numInstances: 16, + numZones: 2, + replicationFactor: 3, + zoneAwarenessEnabled: true, + expectedInstances: 2, + }, + "should succeed if there are instances in 1 zone only on RF = 3 but zone-awareness is disabled": { + numInstances: 16, + numZones: 1, + replicationFactor: 3, + zoneAwarenessEnabled: false, + expectedInstances: 3, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Add instances to the ring. + r := NewDesc() + var prevTokens []uint32 + for i := 0; i < testData.numInstances; i++ { + name := fmt.Sprintf("ing%v", i) + ingTokens := GenerateTokens(128, prevTokens) + + r.AddIngester(name, fmt.Sprintf("127.0.0.%d", i), fmt.Sprintf("zone-%v", i%testData.numZones), ingTokens, ACTIVE, time.Now()) + + prevTokens = append(prevTokens, ingTokens...) + } + + // Create a ring with the instances + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ReplicationFactor: testData.replicationFactor, + ZoneAwarenessEnabled: testData.zoneAwarenessEnabled, + }, + ringDesc: r, + ringTokens: r.GetTokens(), + ringTokensByZone: r.getTokensByZone(), + ringInstanceByToken: r.getTokensInfo(), + ringZones: getZones(r.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + instances := make([]InstanceDesc, 0, len(r.GetIngesters())) + for _, v := range r.GetIngesters() { + instances = append(instances, v) + } + + _, bufHosts, bufZones := MakeBuffersForGet() + + // Use the GenerateTokens to get an array of random uint32 values. + testValues := GenerateTokens(testCount, nil) + + var set ReplicationSet + var err error + for i := 0; i < testCount; i++ { + set, err = ring.Get(testValues[i], Write, instances, bufHosts, bufZones) + if testData.expectedErr != "" { + require.EqualError(t, err, testData.expectedErr) + } else { + require.NoError(t, err) + } + + // Skip the rest of the assertions if we were expecting an error. + if testData.expectedErr != "" { + continue + } + + // Check that we have the expected number of instances for replication. + assert.Equal(t, testData.expectedInstances, len(set.Instances)) + + // Ensure all instances are in a different zone (only if zone-awareness is enabled). + if testData.zoneAwarenessEnabled { + zones := make(map[string]struct{}) + for i := 0; i < len(set.Instances); i++ { + if _, ok := zones[set.Instances[i].Zone]; ok { + t.Fatal("found multiple instances in the same zone") + } + zones[set.Instances[i].Zone] = struct{}{} + } + } + } + }) + } +} + +func TestRing_GetAllHealthy(t *testing.T) { + const heartbeatTimeout = time.Minute + now := time.Now() + + tests := map[string]struct { + ringInstances map[string]InstanceDesc + expectedErrForRead error + expectedSetForRead []string + expectedErrForWrite error + expectedSetForWrite []string + expectedErrForReporting error + expectedSetForReporting []string + }{ + "should return error on empty ring": { + ringInstances: nil, + expectedErrForRead: ErrEmptyRing, + expectedErrForWrite: ErrEmptyRing, + expectedErrForReporting: ErrEmptyRing, + }, + "should return all healthy instances for the given operation": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Unix()}, + "instance-2": {Addr: "127.0.0.2", State: PENDING, Timestamp: now.Add(-10 * time.Second).Unix()}, + "instance-3": {Addr: "127.0.0.3", State: JOINING, Timestamp: now.Add(-20 * time.Second).Unix()}, + "instance-4": {Addr: "127.0.0.4", State: LEAVING, Timestamp: now.Add(-30 * time.Second).Unix()}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix()}, + }, + expectedSetForRead: []string{"127.0.0.1", "127.0.0.2", "127.0.0.4"}, + expectedSetForWrite: []string{"127.0.0.1"}, + expectedSetForReporting: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Init the ring. + ringDesc := &Desc{Ingesters: testData.ringInstances} + for id, instance := range ringDesc.Ingesters { + ringDesc.Ingesters[id] = instance + } + + ring := Ring{ + cfg: Config{HeartbeatTimeout: heartbeatTimeout}, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + set, err := ring.GetAllHealthy(Read) + require.Equal(t, testData.expectedErrForRead, err) + assert.ElementsMatch(t, testData.expectedSetForRead, set.GetAddresses()) + + set, err = ring.GetAllHealthy(Write) + require.Equal(t, testData.expectedErrForWrite, err) + assert.ElementsMatch(t, testData.expectedSetForWrite, set.GetAddresses()) + + set, err = ring.GetAllHealthy(Reporting) + require.Equal(t, testData.expectedErrForReporting, err) + assert.ElementsMatch(t, testData.expectedSetForReporting, set.GetAddresses()) + }) + } +} + +func TestRing_GetReplicationSetForOperation(t *testing.T) { + now := time.Now() + + tests := map[string]struct { + ringInstances map[string]InstanceDesc + ringHeartbeatTimeout time.Duration + ringReplicationFactor int + expectedErrForRead error + expectedSetForRead []string + expectedErrForWrite error + expectedSetForWrite []string + expectedErrForReporting error + expectedSetForReporting []string + }{ + "should return error on empty ring": { + ringInstances: nil, + ringHeartbeatTimeout: time.Minute, + ringReplicationFactor: 1, + expectedErrForRead: ErrEmptyRing, + expectedErrForWrite: ErrEmptyRing, + expectedErrForReporting: ErrEmptyRing, + }, + "should succeed on all healthy instances and RF=1": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", State: ACTIVE, Timestamp: now.Add(-10 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", State: ACTIVE, Timestamp: now.Add(-20 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", State: ACTIVE, Timestamp: now.Add(-30 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-40 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + }, + ringHeartbeatTimeout: time.Minute, + ringReplicationFactor: 1, + expectedSetForRead: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + expectedSetForWrite: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + expectedSetForReporting: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + }, + "should succeed on instances with old timestamps but heartbeat timeout disabled": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + }, + ringHeartbeatTimeout: 0, + ringReplicationFactor: 1, + expectedSetForRead: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + expectedSetForWrite: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + expectedSetForReporting: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5"}, + }, + "should fail on 1 unhealthy instance and RF=1": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", State: ACTIVE, Timestamp: now.Add(-10 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", State: ACTIVE, Timestamp: now.Add(-20 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", State: ACTIVE, Timestamp: now.Add(-30 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + }, + ringHeartbeatTimeout: time.Minute, + ringReplicationFactor: 1, + expectedErrForRead: ErrTooManyUnhealthyInstances, + expectedErrForWrite: ErrTooManyUnhealthyInstances, + expectedErrForReporting: ErrTooManyUnhealthyInstances, + }, + "should succeed on 1 unhealthy instances and RF=3": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", State: ACTIVE, Timestamp: now.Add(-10 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", State: ACTIVE, Timestamp: now.Add(-20 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", State: ACTIVE, Timestamp: now.Add(-30 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + }, + ringHeartbeatTimeout: time.Minute, + ringReplicationFactor: 3, + expectedSetForRead: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + expectedSetForWrite: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + expectedSetForReporting: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + }, + "should fail on 2 unhealthy instances and RF=3": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: now.Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", State: ACTIVE, Timestamp: now.Add(-10 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", State: ACTIVE, Timestamp: now.Add(-20 * time.Second).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: now.Add(-2 * time.Minute).Unix(), Tokens: GenerateTokens(128, nil)}, + }, + ringHeartbeatTimeout: time.Minute, + ringReplicationFactor: 3, + expectedErrForRead: ErrTooManyUnhealthyInstances, + expectedErrForWrite: ErrTooManyUnhealthyInstances, + expectedErrForReporting: ErrTooManyUnhealthyInstances, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Init the ring. + ringDesc := &Desc{Ingesters: testData.ringInstances} + for id, instance := range ringDesc.Ingesters { + ringDesc.Ingesters[id] = instance + } + + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: testData.ringHeartbeatTimeout, + ReplicationFactor: testData.ringReplicationFactor, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + set, err := ring.GetReplicationSetForOperation(Read) + require.Equal(t, testData.expectedErrForRead, err) + assert.ElementsMatch(t, testData.expectedSetForRead, set.GetAddresses()) + + set, err = ring.GetReplicationSetForOperation(Write) + require.Equal(t, testData.expectedErrForWrite, err) + assert.ElementsMatch(t, testData.expectedSetForWrite, set.GetAddresses()) + + set, err = ring.GetReplicationSetForOperation(Reporting) + require.Equal(t, testData.expectedErrForReporting, err) + assert.ElementsMatch(t, testData.expectedSetForReporting, set.GetAddresses()) + }) + } +} + +func TestRing_GetReplicationSetForOperation_WithZoneAwarenessEnabled(t *testing.T) { + tests := map[string]struct { + ringInstances map[string]InstanceDesc + unhealthyInstances []string + expectedAddresses []string + replicationFactor int + expectedError error + expectedMaxErrors int + expectedMaxUnavailableZones int + }{ + "empty ring": { + ringInstances: nil, + expectedError: ErrEmptyRing, + }, + "RF=1, 1 zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + replicationFactor: 1, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=1, 1 zone, one unhealthy instance": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 1, + expectedError: ErrTooManyUnhealthyInstances, + }, + "RF=1, 3 zones, one unhealthy instance": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-3"}, + replicationFactor: 1, + expectedError: ErrTooManyUnhealthyInstances, + }, + "RF=2, 2 zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + replicationFactor: 2, + expectedMaxUnavailableZones: 1, + }, + "RF=2, 2 zones, one unhealthy instance": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1"}, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 2, + }, + "RF=3, 3 zones, one instance per zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 1, + }, + "RF=3, 3 zones, one instance per zone, one instance unhealthy": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.2", "127.0.0.3"}, + unhealthyInstances: []string{"instance-1"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, 3 zones, one instance per zone, two instances unhealthy in separate zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-1", "instance-2"}, + replicationFactor: 3, + expectedError: ErrTooManyUnhealthyInstances, + }, + "RF=3, 3 zones, one instance per zone, all instances unhealthy": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-1", "instance-2", "instance-3"}, + replicationFactor: 3, + expectedError: ErrTooManyUnhealthyInstances, + }, + "RF=3, 3 zones, two instances per zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", "127.0.0.6"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 1, + }, + "RF=3, 3 zones, two instances per zone, two instances unhealthy in same zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.5", "127.0.0.6"}, + unhealthyInstances: []string{"instance-3", "instance-4"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, 3 zones, three instances per zone, two instances unhealthy in same zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.7", "127.0.0.8", "127.0.0.9"}, + unhealthyInstances: []string{"instance-4", "instance-6"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, only 2 zones, two instances per zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 1, + }, + "RF=3, only 2 zones, two instances per zone, one instance unhealthy": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + unhealthyInstances: []string{"instance-4"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, only 1 zone, two instances per zone": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2"}, + replicationFactor: 3, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=3, only 1 zone, two instances per zone, one instance unhealthy": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-2"}, + replicationFactor: 3, + expectedError: ErrTooManyUnhealthyInstances, + }, + "RF=5, 5 zones, two instances per zone except for one zone which has three": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.3", "127.0.0.4", "127.0.0.5", + "127.0.0.6", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + replicationFactor: 5, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 2, + }, + "RF=5, 5 zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in same zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.5", "127.0.0.6", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + unhealthyInstances: []string{"instance-3", "instance-4"}, + replicationFactor: 5, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 1, + }, + "RF=5, 5 zones, two instances per zone except for one zone which has three, 2 unhealthy nodes in separate zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-7": {Addr: "127.0.0.7", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-8": {Addr: "127.0.0.8", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-9": {Addr: "127.0.0.9", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-10": {Addr: "127.0.0.10", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + "instance-11": {Addr: "127.0.0.11", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + expectedAddresses: []string{"127.0.0.1", "127.0.0.2", "127.0.0.7", "127.0.0.8", "127.0.0.9", "127.0.0.10", "127.0.0.11"}, + unhealthyInstances: []string{"instance-3", "instance-5"}, + replicationFactor: 5, + expectedMaxErrors: 0, + expectedMaxUnavailableZones: 0, + }, + "RF=5, 5 zones, one instances per zone, three unhealthy instances": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-d", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-e", Tokens: GenerateTokens(128, nil)}, + }, + unhealthyInstances: []string{"instance-2", "instance-4", "instance-5"}, + replicationFactor: 5, + expectedError: ErrTooManyUnhealthyInstances, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Ensure the test case has been correctly setup (max errors and max unavailable zones are + // mutually exclusive). + require.False(t, testData.expectedMaxErrors > 0 && testData.expectedMaxUnavailableZones > 0) + + // Init the ring. + ringDesc := &Desc{Ingesters: testData.ringInstances} + for id, instance := range ringDesc.Ingesters { + instance.Timestamp = time.Now().Unix() + instance.State = ACTIVE + for _, instanceName := range testData.unhealthyInstances { + if instanceName == id { + instance.Timestamp = time.Now().Add(-time.Hour).Unix() + } + } + ringDesc.Ingesters[id] = instance + } + + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Minute, + ZoneAwarenessEnabled: true, + ReplicationFactor: testData.replicationFactor, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Check the replication set has the correct settings + replicationSet, err := ring.GetReplicationSetForOperation(Read) + if testData.expectedError == nil { + require.NoError(t, err) + } else { + require.Equal(t, testData.expectedError, err) + } + + assert.Equal(t, testData.expectedMaxErrors, replicationSet.MaxErrors) + assert.Equal(t, testData.expectedMaxUnavailableZones, replicationSet.MaxUnavailableZones) + + returnAddresses := []string{} + for _, instance := range replicationSet.Instances { + returnAddresses = append(returnAddresses, instance.Addr) + } + for _, addr := range testData.expectedAddresses { + assert.Contains(t, returnAddresses, addr) + } + assert.Equal(t, len(testData.expectedAddresses), len(replicationSet.Instances)) + }) + } +} + +func TestRing_ShuffleShard(t *testing.T) { + tests := map[string]struct { + ringInstances map[string]InstanceDesc + shardSize int + zoneAwarenessEnabled bool + expectedSize int + expectedDistribution []int + }{ + "empty ring": { + ringInstances: nil, + shardSize: 2, + zoneAwarenessEnabled: true, + expectedSize: 0, + expectedDistribution: []int{}, + }, + "single zone, shard size > num instances": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 3, + zoneAwarenessEnabled: true, + expectedSize: 2, + expectedDistribution: []int{2}, + }, + "single zone, shard size < num instances": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 2, + zoneAwarenessEnabled: true, + expectedSize: 2, + expectedDistribution: []int{2}, + }, + "multiple zones, shard size < num zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 2, + zoneAwarenessEnabled: true, + expectedSize: 3, + expectedDistribution: []int{1, 1, 1}, + }, + "multiple zones, shard size divisible by num zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 3, + zoneAwarenessEnabled: true, + expectedSize: 3, + expectedDistribution: []int{1, 1, 1}, + }, + "multiple zones, shard size NOT divisible by num zones": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 4, + zoneAwarenessEnabled: true, + expectedSize: 6, + expectedDistribution: []int{2, 2, 2}, + }, + "multiple zones, shard size NOT divisible by num zones, but zone awareness is disabled": { + ringInstances: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-2": {Addr: "127.0.0.2", Zone: "zone-a", Tokens: GenerateTokens(128, nil)}, + "instance-3": {Addr: "127.0.0.3", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-4": {Addr: "127.0.0.4", Zone: "zone-b", Tokens: GenerateTokens(128, nil)}, + "instance-5": {Addr: "127.0.0.5", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + "instance-6": {Addr: "127.0.0.6", Zone: "zone-c", Tokens: GenerateTokens(128, nil)}, + }, + shardSize: 4, + zoneAwarenessEnabled: false, + expectedSize: 4, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Init the ring. + ringDesc := &Desc{Ingesters: testData.ringInstances} + for id, instance := range ringDesc.Ingesters { + instance.Timestamp = time.Now().Unix() + instance.State = ACTIVE + ringDesc.Ingesters[id] = instance + } + + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: testData.zoneAwarenessEnabled, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + shardRing := ring.ShuffleShard("tenant-id", testData.shardSize) + assert.Equal(t, testData.expectedSize, shardRing.InstancesCount()) + + // Compute the actual distribution of instances across zones. + if testData.zoneAwarenessEnabled { + var actualDistribution []int + + if shardRing.InstancesCount() > 0 { + all, err := shardRing.GetAllHealthy(Read) + require.NoError(t, err) + + countByZone := map[string]int{} + for _, instance := range all.Instances { + countByZone[instance.Zone]++ + } + + for _, count := range countByZone { + actualDistribution = append(actualDistribution, count) + } + } + + assert.ElementsMatch(t, testData.expectedDistribution, actualDistribution) + } + }) + } +} + +// This test asserts on shard stability across multiple invocations and given the same input ring. +func TestRing_ShuffleShard_Stability(t *testing.T) { + var ( + numTenants = 100 + numInstances = 50 + numZones = 3 + numInvocations = 10 + shardSizes = []int{3, 6, 9, 12, 15} + ) + + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(numInstances, numZones, 128)} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + for i := 1; i <= numTenants; i++ { + tenantID := fmt.Sprintf("%d", i) + + for _, size := range shardSizes { + r := ring.ShuffleShard(tenantID, size) + expected, err := r.GetAllHealthy(Read) + require.NoError(t, err) + + // Assert that multiple invocations generate the same exact shard. + for n := 0; n < numInvocations; n++ { + r := ring.ShuffleShard(tenantID, size) + actual, err := r.GetAllHealthy(Read) + require.NoError(t, err) + assert.ElementsMatch(t, expected.Instances, actual.Instances) + } + } + } +} + +func TestRing_ShuffleShard_Shuffling(t *testing.T) { + var ( + numTenants = 1000 + numInstances = 90 + numZones = 3 + shardSize = 3 + + // This is the expected theoretical distribution of matching instances + // between different shards, given the settings above. It has been computed + // using this spreadsheet: + // https://docs.google.com/spreadsheets/d/1FXbiWTXi6bdERtamH-IfmpgFq1fNL4GP_KX_yJvbRi4/edit + theoreticalMatchings = map[int]float64{ + 0: 90.2239, + 1: 9.55312, + 2: 0.22217, + 3: 0.00085, + } + ) + + // Initialise the ring instances. To have stable tests we generate tokens using a linear + // distribution. Tokens within the same zone are evenly distributed too. + instances := make(map[string]InstanceDesc, numInstances) + for i := 0; i < numInstances; i++ { + id := fmt.Sprintf("instance-%d", i) + instances[id] = InstanceDesc{ + Addr: fmt.Sprintf("127.0.0.%d", i), + Timestamp: time.Now().Unix(), + RegisteredTimestamp: time.Now().Unix(), + State: ACTIVE, + Tokens: generateTokensLinear(i, numInstances, 128), + Zone: fmt.Sprintf("zone-%d", i%numZones), + } + } + + // Initialise the ring. + ringDesc := &Desc{Ingesters: instances} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Compute the shard for each tenant. + shards := map[string][]string{} + + for i := 1; i <= numTenants; i++ { + tenantID := fmt.Sprintf("%d", i) + r := ring.ShuffleShard(tenantID, shardSize) + set, err := r.GetAllHealthy(Read) + require.NoError(t, err) + + instances := make([]string, 0, len(set.Instances)) + for _, instance := range set.Instances { + instances = append(instances, instance.Addr) + } + + shards[tenantID] = instances + } + + // Compute the distribution of matching instances between every combination of shards. + // The shards comparison is not optimized, but it's fine for a test. + distribution := map[int]int{} + + for currID, currShard := range shards { + for otherID, otherShard := range shards { + if currID == otherID { + continue + } + + numMatching := 0 + for _, c := range currShard { + if util.StringsContain(otherShard, c) { + numMatching++ + } + } + + distribution[numMatching]++ + } + } + + maxCombinations := int(math.Pow(float64(numTenants), 2)) - numTenants + for numMatching, probability := range theoreticalMatchings { + // We allow a max deviance of 10% compared to the theoretical probability, + // clamping it between 1% and 0.2% boundaries. + maxDeviance := math.Min(1, math.Max(0.2, probability*0.1)) + + actual := (float64(distribution[numMatching]) / float64(maxCombinations)) * 100 + assert.InDelta(t, probability, actual, maxDeviance, "numMatching: %d", numMatching) + } +} + +func TestRing_ShuffleShard_Consistency(t *testing.T) { + type change string + + type scenario struct { + name string + numInstances int + numZones int + shardSize int + ringChange change + } + + const ( + numTenants = 100 + add = change("add-instance") + remove = change("remove-instance") + ) + + // Generate all test scenarios. + var scenarios []scenario + for _, numInstances := range []int{20, 30, 40, 50} { + for _, shardSize := range []int{3, 6, 9, 12, 15} { + for _, c := range []change{add, remove} { + scenarios = append(scenarios, scenario{ + name: fmt.Sprintf("instances = %d, shard size = %d, ring operation = %s", numInstances, shardSize, c), + numInstances: numInstances, + numZones: 3, + shardSize: shardSize, + ringChange: c, + }) + } + } + } + + for _, s := range scenarios { + t.Run(s.name, func(t *testing.T) { + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(s.numInstances, s.numZones, 128)} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Compute the initial shard for each tenant. + initial := map[int]ReplicationSet{} + for id := 0; id < numTenants; id++ { + set, err := ring.ShuffleShard(fmt.Sprintf("%d", id), s.shardSize).GetAllHealthy(Read) + require.NoError(t, err) + initial[id] = set + } + + // Update the ring. + switch s.ringChange { + case add: + newID, newDesc := generateRingInstance(s.numInstances+1, 0, 128) + ringDesc.Ingesters[newID] = newDesc + case remove: + // Remove the first one. + for id := range ringDesc.Ingesters { + delete(ringDesc.Ingesters, id) + break + } + } + + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + + // Compute the update shard for each tenant and compare it with the initial one. + // If the "consistency" property is guaranteed, we expect no more then 1 different instance + // in the updated shard. + for id := 0; id < numTenants; id++ { + updated, err := ring.ShuffleShard(fmt.Sprintf("%d", id), s.shardSize).GetAllHealthy(Read) + require.NoError(t, err) + + added, removed := compareReplicationSets(initial[id], updated) + assert.LessOrEqual(t, len(added), 1) + assert.LessOrEqual(t, len(removed), 1) + } + }) + } +} + +func TestRing_ShuffleShard_ConsistencyOnShardSizeChanged(t *testing.T) { + // Create 30 instances in 3 zones. + ringInstances := map[string]InstanceDesc{} + for i := 0; i < 30; i++ { + name, desc := generateRingInstance(i, i%3, 128) + ringInstances[name] = desc + } + + // Init the ring. + ringDesc := &Desc{Ingesters: ringInstances} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Get the replication set with shard size = 3. + firstShard := ring.ShuffleShard("tenant-id", 3) + assert.Equal(t, 3, firstShard.InstancesCount()) + + firstSet, err := firstShard.GetAllHealthy(Read) + require.NoError(t, err) + + // Increase shard size to 6. + secondShard := ring.ShuffleShard("tenant-id", 6) + assert.Equal(t, 6, secondShard.InstancesCount()) + + secondSet, err := secondShard.GetAllHealthy(Read) + require.NoError(t, err) + + for _, firstInstance := range firstSet.Instances { + assert.True(t, secondSet.Includes(firstInstance.Addr), "new replication set is expected to include previous instance %s", firstInstance.Addr) + } + + // Increase shard size to 9. + thirdShard := ring.ShuffleShard("tenant-id", 9) + assert.Equal(t, 9, thirdShard.InstancesCount()) + + thirdSet, err := thirdShard.GetAllHealthy(Read) + require.NoError(t, err) + + for _, secondInstance := range secondSet.Instances { + assert.True(t, thirdSet.Includes(secondInstance.Addr), "new replication set is expected to include previous instance %s", secondInstance.Addr) + } + + // Decrease shard size to 6. + fourthShard := ring.ShuffleShard("tenant-id", 6) + assert.Equal(t, 6, fourthShard.InstancesCount()) + + fourthSet, err := fourthShard.GetAllHealthy(Read) + require.NoError(t, err) + + // We expect to have the same exact instances we had when the shard size was 6. + for _, secondInstance := range secondSet.Instances { + assert.True(t, fourthSet.Includes(secondInstance.Addr), "new replication set is expected to include previous instance %s", secondInstance.Addr) + } + + // Decrease shard size to 3. + fifthShard := ring.ShuffleShard("tenant-id", 3) + assert.Equal(t, 3, fifthShard.InstancesCount()) + + fifthSet, err := fifthShard.GetAllHealthy(Read) + require.NoError(t, err) + + // We expect to have the same exact instances we had when the shard size was 3. + for _, firstInstance := range firstSet.Instances { + assert.True(t, fifthSet.Includes(firstInstance.Addr), "new replication set is expected to include previous instance %s", firstInstance.Addr) + } +} + +func TestRing_ShuffleShard_ConsistencyOnZonesChanged(t *testing.T) { + // Create 20 instances in 2 zones. + ringInstances := map[string]InstanceDesc{} + for i := 0; i < 20; i++ { + name, desc := generateRingInstance(i, i%2, 128) + ringInstances[name] = desc + } + + // Init the ring. + ringDesc := &Desc{Ingesters: ringInstances} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Get the replication set with shard size = 2. + firstShard := ring.ShuffleShard("tenant-id", 2) + assert.Equal(t, 2, firstShard.InstancesCount()) + + firstSet, err := firstShard.GetAllHealthy(Read) + require.NoError(t, err) + + // Increase shard size to 4. + secondShard := ring.ShuffleShard("tenant-id", 4) + assert.Equal(t, 4, secondShard.InstancesCount()) + + secondSet, err := secondShard.GetAllHealthy(Read) + require.NoError(t, err) + + for _, firstInstance := range firstSet.Instances { + assert.True(t, secondSet.Includes(firstInstance.Addr), "new replication set is expected to include previous instance %s", firstInstance.Addr) + } + + // Scale up cluster, adding 10 instances in 1 new zone. + for i := 20; i < 30; i++ { + name, desc := generateRingInstance(i, 2, 128) + ringInstances[name] = desc + } + + ring.ringDesc.Ingesters = ringInstances + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + + // Increase shard size to 6. + thirdShard := ring.ShuffleShard("tenant-id", 6) + assert.Equal(t, 6, thirdShard.InstancesCount()) + + thirdSet, err := thirdShard.GetAllHealthy(Read) + require.NoError(t, err) + + for _, secondInstance := range secondSet.Instances { + assert.True(t, thirdSet.Includes(secondInstance.Addr), "new replication set is expected to include previous instance %s", secondInstance.Addr) + } + + // Increase shard size to 9. + fourthShard := ring.ShuffleShard("tenant-id", 9) + assert.Equal(t, 9, fourthShard.InstancesCount()) + + fourthSet, err := fourthShard.GetAllHealthy(Read) + require.NoError(t, err) + + for _, thirdInstance := range thirdSet.Instances { + assert.True(t, fourthSet.Includes(thirdInstance.Addr), "new replication set is expected to include previous instance %s", thirdInstance.Addr) + } +} + +func TestRing_ShuffleShardWithLookback(t *testing.T) { + type eventType int + + const ( + add eventType = iota + remove + test + + lookbackPeriod = time.Hour + userID = "user-1" + ) + + var ( + now = time.Now() + ) + + type event struct { + what eventType + instanceID string + instanceDesc InstanceDesc + shardSize int + expected []string + } + + tests := map[string]struct { + timeline []event + }{ + "single zone, shard size = 1, recently bootstrapped cluster": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-time.Minute))}, + {what: test, shardSize: 1, expected: []string{"instance-1", "instance-2", "instance-3"}}, + }, + }, + "single zone, shard size = 1, instances scale up": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 2}, now.Add(-10*time.Minute))}, + {what: test, shardSize: 1, expected: []string{"instance-4" /* lookback: */, "instance-1"}}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-5*time.Minute))}, + {what: test, shardSize: 1, expected: []string{"instance-5" /* lookback: */, "instance-4", "instance-1"}}, + }, + }, + "single zone, shard size = 1, instances scale down": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: remove, instanceID: "instance-3"}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: remove, instanceID: "instance-1"}, + {what: test, shardSize: 1, expected: []string{"instance-2"}}, + }, + }, + "single zone, shard size = 1, rollout with instances unregistered (removed and re-added one by one)": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + // Rollout instance-3. + {what: remove, instanceID: "instance-3"}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now)}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + // Rollout instance-2. + {what: remove, instanceID: "instance-2"}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now)}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + // Rollout instance-1. + {what: remove, instanceID: "instance-1"}, + {what: test, shardSize: 1, expected: []string{"instance-2" /* side effect: */, "instance-3"}}, + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now)}, + {what: test, shardSize: 1, expected: []string{"instance-1" /* lookback: */, "instance-2" /* side effect: */, "instance-3"}}, + }, + }, + "single zone, shard size = 2, rollout with instances unregistered (removed and re-added one by one)": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 3) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + // Rollout instance-4. + {what: remove, instanceID: "instance-4"}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 3) + 1}, now)}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + // Rollout instance-3. + {what: remove, instanceID: "instance-3"}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now)}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + // Rollout instance-2. + {what: remove, instanceID: "instance-2"}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-3" /* side effect:*/, "instance-4"}}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now)}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2" /* lookback: */, "instance-3" /* side effect:*/, "instance-4"}}, + // Rollout instance-1. + {what: remove, instanceID: "instance-1"}, + {what: test, shardSize: 2, expected: []string{"instance-2" /* lookback: */, "instance-3" /* side effect:*/, "instance-4"}}, + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now)}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2" /* lookback: */, "instance-3" /* side effect:*/, "instance-4"}}, + }, + }, + "single zone, increase shard size": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 1, expected: []string{"instance-1"}}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3"}}, + {what: test, shardSize: 4, expected: []string{"instance-1", "instance-2", "instance-3"}}, + }, + }, + "multi zone, shard size = 3, recently bootstrapped cluster": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 3) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-b", []uint32{userToken(userID, "zone-b", 4) + 1}, now.Add(-time.Minute))}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 5) + 1}, now.Add(-time.Minute))}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3", "instance-4", "instance-5", "instance-6"}}, + }, + }, + "multi zone, shard size = 3, instances scale up": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 3}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3"}}, + // Scale up. + {what: add, instanceID: "instance-7", instanceDesc: generateRingInstanceWithInfo("instance-7", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now)}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-2", "instance-3" /* lookback: */, "instance-1"}}, + {what: add, instanceID: "instance-8", instanceDesc: generateRingInstanceWithInfo("instance-8", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now)}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-8", "instance-3" /* lookback: */, "instance-1", "instance-2"}}, + {what: add, instanceID: "instance-9", instanceDesc: generateRingInstanceWithInfo("instance-9", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 1}, now)}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-8", "instance-9" /* lookback: */, "instance-1", "instance-2", "instance-3"}}, + }, + }, + "multi zone, shard size = 3, instances scale down": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-7", instanceDesc: generateRingInstanceWithInfo("instance-7", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-8", instanceDesc: generateRingInstanceWithInfo("instance-8", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-9", instanceDesc: generateRingInstanceWithInfo("instance-9", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 2}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3"}}, + // Scale down. + {what: remove, instanceID: "instance-1"}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-2", "instance-3"}}, + {what: remove, instanceID: "instance-2"}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-8", "instance-3"}}, + {what: remove, instanceID: "instance-3"}, + {what: test, shardSize: 3, expected: []string{"instance-7", "instance-8", "instance-9"}}, + }, + }, + "multi zone, increase shard size": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 3}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-7", instanceDesc: generateRingInstanceWithInfo("instance-7", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-8", instanceDesc: generateRingInstanceWithInfo("instance-8", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 2}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-9", instanceDesc: generateRingInstanceWithInfo("instance-9", "zone-c", []uint32{userToken(userID, "zone-c", 2) + 2}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3"}}, + {what: test, shardSize: 6, expected: []string{"instance-1", "instance-2", "instance-3", "instance-7", "instance-8", "instance-9"}}, + }, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + // Initialise the ring. + ringDesc := &Desc{Ingesters: map[string]InstanceDesc{}} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // Replay the events on the timeline. + for _, event := range testData.timeline { + switch event.what { + case add: + ringDesc.Ingesters[event.instanceID] = event.instanceDesc + + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + case remove: + delete(ringDesc.Ingesters, event.instanceID) + + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + case test: + rs, err := ring.ShuffleShardWithLookback(userID, event.shardSize, lookbackPeriod, time.Now()).GetAllHealthy(Read) + require.NoError(t, err) + assert.ElementsMatch(t, event.expected, rs.GetAddresses()) + } + } + }) + } +} + +func TestRing_ShuffleShardWithLookback_CorrectnessWithFuzzy(t *testing.T) { + // The goal of this test is NOT to ensure that the minimum required number of instances + // are returned at any given time, BUT at least all required instances are returned. + var ( + numInitialInstances = []int{9, 30, 60, 90} + numInitialZones = []int{1, 3} + numEvents = 100 + lookbackPeriod = time.Hour + delayBetweenEvents = 5 * time.Minute // 12 events / hour + userID = "user-1" + ) + + for _, numInstances := range numInitialInstances { + for _, numZones := range numInitialZones { + testName := fmt.Sprintf("num instances = %d, num zones = %d", numInstances, numZones) + + t.Run(testName, func(t *testing.T) { + // Randomise the seed but log it in case we need to reproduce the test on failure. + seed := time.Now().UnixNano() + rand.Seed(seed) + t.Log("random generator seed:", seed) + + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(numInstances, numZones, 128)} + ring := Ring{ + cfg: Config{ + HeartbeatTimeout: time.Hour, + ZoneAwarenessEnabled: true, + ReplicationFactor: 3, + }, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + // The simulation starts with the minimum shard size. Random events can later increase it. + shardSize := numZones + + // The simulation assumes the initial ring contains instances registered + // since more than the lookback period. + currTime := time.Now().Add(lookbackPeriod).Add(time.Minute) + + // Add the initial shard to the history. + rs, err := ring.shuffleShard(userID, shardSize, 0, time.Now()).GetReplicationSetForOperation(Read) + require.NoError(t, err) + + history := map[time.Time]ReplicationSet{ + currTime: rs, + } + + // Simulate a progression of random events over the time and, at each iteration of the simuation, + // make sure the subring includes all non-removed instances picked from previous versions of the + // ring up until the lookback period. + nextInstanceID := len(ringDesc.Ingesters) + 1 + + for i := 1; i <= numEvents; i++ { + currTime = currTime.Add(delayBetweenEvents) + + switch r := rand.Intn(100); { + case r < 80: + // Scale up instances by 1. + instanceID := fmt.Sprintf("instance-%d", nextInstanceID) + zoneID := fmt.Sprintf("zone-%d", nextInstanceID%numZones) + nextInstanceID++ + + ringDesc.Ingesters[instanceID] = generateRingInstanceWithInfo(instanceID, zoneID, GenerateTokens(128, nil), currTime) + + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + case r < 90: + // Scale down instances by 1. To make tests reproducible we get the instance IDs, sort them + // and then get a random index (using the random generator initialized with a constant seed). + instanceIDs := make([]string, 0, len(ringDesc.Ingesters)) + for id := range ringDesc.Ingesters { + instanceIDs = append(instanceIDs, id) + } + + sort.Strings(instanceIDs) + + idxToRemove := rand.Intn(len(instanceIDs)) + idToRemove := instanceIDs[idxToRemove] + delete(ringDesc.Ingesters, idToRemove) + + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + + // Remove the terminated instance from the history. + for ringTime, ringState := range history { + for idx, desc := range ringState.Instances { + // In this simulation instance ID == instance address. + if desc.Addr != idToRemove { + continue + } + + ringState.Instances = append(ringState.Instances[:idx], ringState.Instances[idx+1:]...) + history[ringTime] = ringState + break + } + } + default: + // Scale up shard size (keeping the per-zone balance). + shardSize += numZones + } + + // Add the current shard to the history. + rs, err = ring.shuffleShard(userID, shardSize, 0, time.Now()).GetReplicationSetForOperation(Read) + require.NoError(t, err) + history[currTime] = rs + + // Ensure the shard with lookback includes all instances from previous states of the ring. + rsWithLookback, err := ring.ShuffleShardWithLookback(userID, shardSize, lookbackPeriod, currTime).GetReplicationSetForOperation(Read) + require.NoError(t, err) + + for ringTime, ringState := range history { + if ringTime.Before(currTime.Add(-lookbackPeriod)) { + // This entry from the history is obsolete, we can remove it. + delete(history, ringTime) + continue + } + + for _, expectedAddr := range ringState.GetAddresses() { + if !rsWithLookback.Includes(expectedAddr) { + t.Fatalf( + "subring generated after event %d is expected to include instance %s from ring state at time %s but it's missing (actual instances are: %s)", + i, expectedAddr, ringTime.String(), strings.Join(rsWithLookback.GetAddresses(), ", ")) + } + } + } + } + }) + } + } +} + +func BenchmarkRing_ShuffleShard(b *testing.B) { + for _, numInstances := range []int{50, 100, 1000} { + for _, numZones := range []int{1, 3} { + for _, shardSize := range []int{3, 10, 30} { + b.Run(fmt.Sprintf("num instances = %d, num zones = %d, shard size = %d", numInstances, numZones, shardSize), func(b *testing.B) { + benchmarkShuffleSharding(b, numInstances, numZones, 128, shardSize, false) + }) + } + } + } +} + +func BenchmarkRing_ShuffleShardCached(b *testing.B) { + for _, numInstances := range []int{50, 100, 1000} { + for _, numZones := range []int{1, 3} { + for _, shardSize := range []int{3, 10, 30} { + b.Run(fmt.Sprintf("num instances = %d, num zones = %d, shard size = %d", numInstances, numZones, shardSize), func(b *testing.B) { + benchmarkShuffleSharding(b, numInstances, numZones, 128, shardSize, true) + }) + } + } + } +} + +func BenchmarkRing_ShuffleShard_512Tokens(b *testing.B) { + const ( + numInstances = 30 + numZones = 3 + numTokens = 512 + shardSize = 9 + cacheEnabled = false + ) + + benchmarkShuffleSharding(b, numInstances, numZones, numTokens, shardSize, cacheEnabled) +} + +func benchmarkShuffleSharding(b *testing.B, numInstances, numZones, numTokens, shardSize int, cache bool) { + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(numInstances, numZones, numTokens)} + ring := Ring{ + cfg: Config{HeartbeatTimeout: time.Hour, ZoneAwarenessEnabled: true, SubringCacheDisabled: !cache}, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + shuffledSubringCache: map[subringCacheKey]*Ring{}, + strategy: NewDefaultReplicationStrategy(), + lastTopologyChange: time.Now(), + } + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + ring.ShuffleShard("tenant-1", shardSize) + } +} + +func BenchmarkRing_Get(b *testing.B) { + const ( + numInstances = 100 + numZones = 3 + replicationFactor = 3 + ) + + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(numInstances, numZones, numTokens)} + ring := Ring{ + cfg: Config{HeartbeatTimeout: time.Hour, ZoneAwarenessEnabled: true, SubringCacheDisabled: true, ReplicationFactor: replicationFactor}, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + shuffledSubringCache: map[subringCacheKey]*Ring{}, + strategy: NewDefaultReplicationStrategy(), + lastTopologyChange: time.Now(), + } + + buf, bufHosts, bufZones := MakeBuffersForGet() + r := rand.New(rand.NewSource(time.Now().UnixNano())) + + b.ResetTimer() + + for n := 0; n < b.N; n++ { + set, err := ring.Get(r.Uint32(), Write, buf, bufHosts, bufZones) + if err != nil || len(set.Instances) != replicationFactor { + b.Fatal() + } + } +} + +func TestRing_Get_NoMemoryAllocations(t *testing.T) { + // Initialise the ring. + ringDesc := &Desc{Ingesters: generateRingInstances(3, 3, 128)} + ring := Ring{ + cfg: Config{HeartbeatTimeout: time.Hour, ZoneAwarenessEnabled: true, SubringCacheDisabled: true, ReplicationFactor: 3}, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + shuffledSubringCache: map[subringCacheKey]*Ring{}, + strategy: NewDefaultReplicationStrategy(), + lastTopologyChange: time.Now(), + } + + buf, bufHosts, bufZones := MakeBuffersForGet() + r := rand.New(rand.NewSource(time.Now().UnixNano())) + + numAllocs := testing.AllocsPerRun(10, func() { + set, err := ring.Get(r.Uint32(), Write, buf, bufHosts, bufZones) + if err != nil || len(set.Instances) != 3 { + t.Fail() + } + }) + + assert.Equal(t, float64(0), numAllocs) +} + +// generateTokensLinear returns tokens with a linear distribution. +func generateTokensLinear(instanceID, numInstances, numTokens int) []uint32 { + tokens := make([]uint32, 0, numTokens) + step := math.MaxUint32 / numTokens + offset := (step / numInstances) * instanceID + + for t := offset; t <= math.MaxUint32; t += step { + tokens = append(tokens, uint32(t)) + } + + return tokens +} + +func generateRingInstances(numInstances, numZones, numTokens int) map[string]InstanceDesc { + instances := make(map[string]InstanceDesc, numInstances) + + for i := 1; i <= numInstances; i++ { + id, desc := generateRingInstance(i, i%numZones, numTokens) + instances[id] = desc + } + + return instances +} + +func generateRingInstance(id, zone, numTokens int) (string, InstanceDesc) { + instanceID := fmt.Sprintf("instance-%d", id) + zoneID := fmt.Sprintf("zone-%d", zone) + + return instanceID, generateRingInstanceWithInfo(instanceID, zoneID, GenerateTokens(numTokens, nil), time.Now()) +} + +func generateRingInstanceWithInfo(addr, zone string, tokens []uint32, registeredAt time.Time) InstanceDesc { + return InstanceDesc{ + Addr: addr, + Timestamp: time.Now().Unix(), + RegisteredTimestamp: registeredAt.Unix(), + State: ACTIVE, + Tokens: tokens, + Zone: zone, + } +} + +// compareReplicationSets returns the list of instance addresses which differ between the two sets. +func compareReplicationSets(first, second ReplicationSet) (added, removed []string) { + for _, instance := range first.Instances { + if !second.Includes(instance.Addr) { + added = append(added, instance.Addr) + } + } + + for _, instance := range second.Instances { + if !first.Includes(instance.Addr) { + removed = append(removed, instance.Addr) + } + } + + return +} + +// This test verifies that ring is getting updates, even after extending check in the loop method. +func TestRingUpdates(t *testing.T) { + const ( + numInstances = 3 + numZones = 3 + ) + + tests := map[string]struct { + excludedZones []string + expectedInstances int + }{ + "without excluded zones": { + expectedInstances: 3, + }, + "with excluded zones": { + excludedZones: []string{"zone-0"}, + expectedInstances: 2, + }, + } + + for testName, testData := range tests { + t.Run(testName, func(t *testing.T) { + inmem, closer := consul.NewInMemoryClient(GetCodec(), log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := Config{ + KVStore: kv.Config{Mock: inmem}, + HeartbeatTimeout: 1 * time.Minute, + ReplicationFactor: 3, + ExcludedZones: flagext.StringSliceCSV(testData.excludedZones), + } + + ring, err := New(cfg, "test", "test", log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), ring)) + t.Cleanup(func() { + _ = services.StopAndAwaitTerminated(context.Background(), ring) + }) + + require.Equal(t, 0, ring.InstancesCount()) + + // Start 1 lifecycler for each instance we want to register in the ring. + var lifecyclers []*Lifecycler + for instanceID := 1; instanceID <= numInstances; instanceID++ { + lifecyclers = append(lifecyclers, startLifecycler(t, cfg, 100*time.Millisecond, instanceID, numZones)) + } + + // Ensure the ring client got updated. + test.Poll(t, 1*time.Second, testData.expectedInstances, func() interface{} { + return ring.InstancesCount() + }) + + // Sleep for a few seconds (ring timestamp resolution is 1 second, so to verify that ring is updated in the background, + // sleep for 2 seconds) + time.Sleep(2 * time.Second) + + rs, err := ring.GetAllHealthy(Read) + require.NoError(t, err) + + now := time.Now() + for _, ing := range rs.Instances { + require.InDelta(t, now.UnixNano(), time.Unix(ing.Timestamp, 0).UnixNano(), float64(1500*time.Millisecond.Nanoseconds())) + + // Ensure there's no instance in an excluded zone. + if len(testData.excludedZones) > 0 { + assert.False(t, util.StringsContain(testData.excludedZones, ing.Zone)) + } + } + + // Stop all lifecyclers. + for _, lc := range lifecyclers { + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), lc)) + } + + // Ensure the ring client got updated. + test.Poll(t, 1*time.Second, 0, func() interface{} { + return ring.InstancesCount() + }) + }) + } +} + +func startLifecycler(t *testing.T, cfg Config, heartbeat time.Duration, lifecyclerID int, zones int) *Lifecycler { + lcCfg := LifecyclerConfig{ + RingConfig: cfg, + NumTokens: 16, + HeartbeatPeriod: heartbeat, + ObservePeriod: 0, + JoinAfter: 0, + Zone: fmt.Sprintf("zone-%d", lifecyclerID%zones), + Addr: fmt.Sprintf("addr-%d", lifecyclerID), + ID: fmt.Sprintf("instance-%d", lifecyclerID), + UnregisterOnShutdown: true, + } + + lc, err := NewLifecycler(lcCfg, &noopFlushTransferer{}, "test", "test", false, log.NewNopLogger(), nil) + require.NoError(t, err) + + lc.AddListener(services.NewListener(nil, nil, nil, nil, func(from services.State, failure error) { + t.Log("lifecycler", lifecyclerID, "failed:", failure) + t.Fail() + })) + + require.NoError(t, services.StartAndAwaitRunning(context.Background(), lc)) + + t.Cleanup(func() { + _ = services.StopAndAwaitTerminated(context.Background(), lc) + }) + + return lc +} + +// This test checks if shuffle-sharded ring can be reused, and whether it receives +// updates from "main" ring. +func TestShuffleShardWithCaching(t *testing.T) { + inmem, closer := consul.NewInMemoryClientWithConfig(GetCodec(), consul.Config{ + MaxCasRetries: 20, + CasRetryDelay: 500 * time.Millisecond, + }, log.NewNopLogger(), nil) + t.Cleanup(func() { assert.NoError(t, closer.Close()) }) + + cfg := Config{ + KVStore: kv.Config{Mock: inmem}, + HeartbeatTimeout: 1 * time.Minute, + ReplicationFactor: 3, + ZoneAwarenessEnabled: true, + } + + ring, err := New(cfg, "test", "test", log.NewNopLogger(), nil) + require.NoError(t, err) + require.NoError(t, services.StartAndAwaitRunning(context.Background(), ring)) + t.Cleanup(func() { + _ = services.StartAndAwaitRunning(context.Background(), ring) + }) + + // We will stop instances later, to see that subring is recomputed. + const numLifecyclers = 6 + const zones = 3 + + lcs := []*Lifecycler(nil) + for i := 0; i < numLifecyclers; i++ { + lc := startLifecycler(t, cfg, 500*time.Millisecond, i, zones) + + lcs = append(lcs, lc) + } + + // Wait until all instances in the ring are ACTIVE. + test.Poll(t, 5*time.Second, numLifecyclers, func() interface{} { + active := 0 + rs, _ := ring.GetReplicationSetForOperation(Read) + for _, ing := range rs.Instances { + if ing.State == ACTIVE { + active++ + } + } + return active + }) + + // Use shardSize = zones, to get one instance from each zone. + const shardSize = zones + const user = "user" + + // This subring should be cached, and reused. + subring := ring.ShuffleShard(user, shardSize) + + // Do 100 iterations over two seconds. Make sure we get the same subring. + const iters = 100 + sleep := (2 * time.Second) / iters + for i := 0; i < iters; i++ { + newSubring := ring.ShuffleShard(user, shardSize) + require.True(t, subring == newSubring, "cached subring reused") + require.Equal(t, shardSize, subring.InstancesCount()) + time.Sleep(sleep) + } + + // Make sure subring has up-to-date timestamps. + { + rs, err := subring.GetReplicationSetForOperation(Read) + require.NoError(t, err) + + now := time.Now() + for _, ing := range rs.Instances { + // Lifecyclers use 500ms refresh, but timestamps use 1s resolution, so we better give it some extra buffer. + assert.InDelta(t, now.UnixNano(), time.Unix(ing.Timestamp, 0).UnixNano(), float64(2*time.Second.Nanoseconds())) + } + } + + // Now stop one lifecycler from each zone. Subring needs to be recomputed. + for i := 0; i < zones; i++ { + require.NoError(t, services.StopAndAwaitTerminated(context.Background(), lcs[i])) + } + + test.Poll(t, 5*time.Second, numLifecyclers-zones, func() interface{} { + return ring.InstancesCount() + }) + + // Change of instances -> new subring needed. + newSubring := ring.ShuffleShard("user", zones) + require.False(t, subring == newSubring) + require.Equal(t, zones, subring.InstancesCount()) + + // Change of shard size -> new subring needed. + subring = newSubring + newSubring = ring.ShuffleShard("user", 1) + require.False(t, subring == newSubring) + // Zone-aware shuffle-shard gives all zones the same number of instances (at least one). + require.Equal(t, zones, newSubring.InstancesCount()) + + // Verify that getting the same subring uses cached instance. + subring = newSubring + newSubring = ring.ShuffleShard("user", 1) + require.True(t, subring == newSubring) + + // But after cleanup, it doesn't. + ring.CleanupShuffleShardCache("user") + newSubring = ring.ShuffleShard("user", 1) + require.False(t, subring == newSubring) +} + +// User shuffle shard token. +func userToken(user, zone string, skip int) uint32 { + r := rand.New(rand.NewSource(shard.ShuffleShardSeed(user, zone))) + + for ; skip > 0; skip-- { + _ = r.Uint32() + } + return r.Uint32() +} + +func TestUpdateMetrics(t *testing.T) { + cfg := Config{ + KVStore: kv.Config{}, + HeartbeatTimeout: 0, // get healthy stats + ReplicationFactor: 3, + ZoneAwarenessEnabled: true, + } + + registry := prometheus.NewRegistry() + + // create the ring to set up metrics, but do not start + ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, nil, NewDefaultReplicationStrategy(), registry, log.NewNopLogger()) + require.NoError(t, err) + + ringDesc := Desc{ + Ingesters: map[string]InstanceDesc{ + "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + "B": {Addr: "127.0.0.2", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, + }, + } + ring.updateRingState(&ringDesc) + + err = testutil.GatherAndCompare(registry, bytes.NewBufferString(` + # HELP ring_member_ownership_percent The percent ownership of the ring by member + # TYPE ring_member_ownership_percent gauge + ring_member_ownership_percent{member="A",name="test"} 0.500000000349246 + ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403 + # HELP ring_members Number of members in the ring + # TYPE ring_members gauge + ring_members{name="test",state="ACTIVE"} 2 + ring_members{name="test",state="JOINING"} 0 + ring_members{name="test",state="LEAVING"} 0 + ring_members{name="test",state="PENDING"} 0 + ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. + # TYPE ring_oldest_member_timestamp gauge + ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11 + ring_oldest_member_timestamp{name="test",state="JOINING"} 0 + ring_oldest_member_timestamp{name="test",state="LEAVING"} 0 + ring_oldest_member_timestamp{name="test",state="PENDING"} 0 + ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0 + # HELP ring_tokens_owned The number of tokens in the ring owned by the member + # TYPE ring_tokens_owned gauge + ring_tokens_owned{member="A",name="test"} 2 + ring_tokens_owned{member="B",name="test"} 2 + # HELP ring_tokens_total Number of tokens in the ring + # TYPE ring_tokens_total gauge + ring_tokens_total{name="test"} 4 + `)) + assert.NoError(t, err) +} + +func TestUpdateMetricsWithRemoval(t *testing.T) { + cfg := Config{ + KVStore: kv.Config{}, + HeartbeatTimeout: 0, // get healthy stats + ReplicationFactor: 3, + ZoneAwarenessEnabled: true, + } + + registry := prometheus.NewRegistry() + + // create the ring to set up metrics, but do not start + ring, err := NewWithStoreClientAndStrategy(cfg, testRingName, testRingKey, nil, NewDefaultReplicationStrategy(), registry, log.NewNopLogger()) + require.NoError(t, err) + + ringDesc := Desc{ + Ingesters: map[string]InstanceDesc{ + "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + "B": {Addr: "127.0.0.2", Timestamp: 11, Tokens: []uint32{(math.MaxUint32 / 4) * 2, math.MaxUint32}}, + }, + } + ring.updateRingState(&ringDesc) + + err = testutil.GatherAndCompare(registry, bytes.NewBufferString(` + # HELP ring_member_ownership_percent The percent ownership of the ring by member + # TYPE ring_member_ownership_percent gauge + ring_member_ownership_percent{member="A",name="test"} 0.500000000349246 + ring_member_ownership_percent{member="B",name="test"} 0.49999999965075403 + # HELP ring_members Number of members in the ring + # TYPE ring_members gauge + ring_members{name="test",state="ACTIVE"} 2 + ring_members{name="test",state="JOINING"} 0 + ring_members{name="test",state="LEAVING"} 0 + ring_members{name="test",state="PENDING"} 0 + ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. + # TYPE ring_oldest_member_timestamp gauge + ring_oldest_member_timestamp{name="test",state="ACTIVE"} 11 + ring_oldest_member_timestamp{name="test",state="JOINING"} 0 + ring_oldest_member_timestamp{name="test",state="LEAVING"} 0 + ring_oldest_member_timestamp{name="test",state="PENDING"} 0 + ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0 + # HELP ring_tokens_owned The number of tokens in the ring owned by the member + # TYPE ring_tokens_owned gauge + ring_tokens_owned{member="A",name="test"} 2 + ring_tokens_owned{member="B",name="test"} 2 + # HELP ring_tokens_total Number of tokens in the ring + # TYPE ring_tokens_total gauge + ring_tokens_total{name="test"} 4 + `)) + require.NoError(t, err) + + ringDescNew := Desc{ + Ingesters: map[string]InstanceDesc{ + "A": {Addr: "127.0.0.1", Timestamp: 22, Tokens: []uint32{math.MaxUint32 / 4, (math.MaxUint32 / 4) * 3}}, + }, + } + ring.updateRingState(&ringDescNew) + + err = testutil.GatherAndCompare(registry, bytes.NewBufferString(` + # HELP ring_member_ownership_percent The percent ownership of the ring by member + # TYPE ring_member_ownership_percent gauge + ring_member_ownership_percent{member="A",name="test"} 1 + # HELP ring_members Number of members in the ring + # TYPE ring_members gauge + ring_members{name="test",state="ACTIVE"} 1 + ring_members{name="test",state="JOINING"} 0 + ring_members{name="test",state="LEAVING"} 0 + ring_members{name="test",state="PENDING"} 0 + ring_members{name="test",state="Unhealthy"} 0 + # HELP ring_oldest_member_timestamp Timestamp of the oldest member in the ring. + # TYPE ring_oldest_member_timestamp gauge + ring_oldest_member_timestamp{name="test",state="ACTIVE"} 22 + ring_oldest_member_timestamp{name="test",state="JOINING"} 0 + ring_oldest_member_timestamp{name="test",state="LEAVING"} 0 + ring_oldest_member_timestamp{name="test",state="PENDING"} 0 + ring_oldest_member_timestamp{name="test",state="Unhealthy"} 0 + # HELP ring_tokens_owned The number of tokens in the ring owned by the member + # TYPE ring_tokens_owned gauge + ring_tokens_owned{member="A",name="test"} 2 + # HELP ring_tokens_total Number of tokens in the ring + # TYPE ring_tokens_total gauge + ring_tokens_total{name="test"} 2 + `)) + assert.NoError(t, err) +} diff --git a/vendor/github.com/grafana/dskit/ring/shard/shard.go b/pkg/ring/shard/shard.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/shard/shard.go rename to pkg/ring/shard/shard.go diff --git a/pkg/ring/shard/shard_test.go b/pkg/ring/shard/shard_test.go new file mode 100644 index 00000000000..f23260bbc8b --- /dev/null +++ b/pkg/ring/shard/shard_test.go @@ -0,0 +1,90 @@ +package shard + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestShuffleShardExpectedInstancesPerZone(t *testing.T) { + tests := []struct { + shardSize int + numZones int + expected int + }{ + { + shardSize: 1, + numZones: 1, + expected: 1, + }, + { + shardSize: 1, + numZones: 3, + expected: 1, + }, + { + shardSize: 3, + numZones: 3, + expected: 1, + }, + { + shardSize: 4, + numZones: 3, + expected: 2, + }, + { + shardSize: 6, + numZones: 3, + expected: 2, + }, + } + + for _, test := range tests { + assert.Equal(t, test.expected, ShuffleShardExpectedInstancesPerZone(test.shardSize, test.numZones)) + } +} + +func TestShuffleShardExpectedInstances(t *testing.T) { + tests := []struct { + shardSize int + numZones int + expected int + }{ + { + shardSize: 1, + numZones: 1, + expected: 1, + }, + { + shardSize: 1, + numZones: 3, + expected: 3, + }, + { + shardSize: 3, + numZones: 3, + expected: 3, + }, + { + shardSize: 4, + numZones: 3, + expected: 6, + }, + { + shardSize: 6, + numZones: 3, + expected: 6, + }, + } + + for _, test := range tests { + assert.Equal(t, test.expected, ShuffleShardExpectedInstances(test.shardSize, test.numZones)) + } +} + +func TestYoloBuf(t *testing.T) { + s := yoloBuf("hello world") + + require.Equal(t, []byte("hello world"), s) +} diff --git a/vendor/github.com/grafana/dskit/ring/ticker.go b/pkg/ring/ticker.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/ticker.go rename to pkg/ring/ticker.go diff --git a/pkg/ring/ticker_test.go b/pkg/ring/ticker_test.go new file mode 100644 index 00000000000..81fe6cf2149 --- /dev/null +++ b/pkg/ring/ticker_test.go @@ -0,0 +1,34 @@ +package ring + +import ( + "testing" + "time" +) + +func TestNewDisableableTicker_Enabled(t *testing.T) { + stop, ch := newDisableableTicker(10 * time.Millisecond) + defer stop() + + time.Sleep(100 * time.Millisecond) + + select { + case <-ch: + break + default: + t.Error("ticker should have ticked when enabled") + } +} + +func TestNewDisableableTicker_Disabled(t *testing.T) { + stop, ch := newDisableableTicker(0) + defer stop() + + time.Sleep(100 * time.Millisecond) + + select { + case <-ch: + t.Error("ticker should not have ticked when disabled") + default: + break + } +} diff --git a/vendor/github.com/grafana/dskit/ring/tokens.go b/pkg/ring/tokens.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/tokens.go rename to pkg/ring/tokens.go diff --git a/pkg/ring/tokens_test.go b/pkg/ring/tokens_test.go new file mode 100644 index 00000000000..3639f192871 --- /dev/null +++ b/pkg/ring/tokens_test.go @@ -0,0 +1,71 @@ +package ring + +import ( + "math/rand" + "path/filepath" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTokens_Serialization(t *testing.T) { + tokens := make(Tokens, 512) + for i := 0; i < 512; i++ { + tokens = append(tokens, uint32(rand.Int31())) + } + + b, err := tokens.Marshal() + require.NoError(t, err) + + var unmarshaledTokens Tokens + require.NoError(t, unmarshaledTokens.Unmarshal(b)) + require.Equal(t, tokens, unmarshaledTokens) +} + +func TestTokens_Equals(t *testing.T) { + tests := []struct { + first Tokens + second Tokens + expected bool + }{ + { + first: Tokens{}, + second: Tokens{}, + expected: true, + }, + { + first: Tokens{1, 2, 3}, + second: Tokens{1, 2, 3}, + expected: true, + }, + { + first: Tokens{1, 2, 3}, + second: Tokens{3, 2, 1}, + expected: true, + }, + { + first: Tokens{1, 2}, + second: Tokens{1, 2, 3}, + expected: false, + }, + } + + for _, c := range tests { + assert.Equal(t, c.expected, c.first.Equals(c.second)) + assert.Equal(t, c.expected, c.second.Equals(c.first)) + } +} + +func TestLoadTokensFromFile_ShouldGuaranteeSortedTokens(t *testing.T) { + tmpDir := t.TempDir() + + // Store tokens to file. + orig := Tokens{1, 5, 3} + require.NoError(t, orig.StoreToFile(filepath.Join(tmpDir, "tokens"))) + + // Read back and ensure they're sorted. + actual, err := LoadTokensFromFile(filepath.Join(tmpDir, "tokens")) + require.NoError(t, err) + assert.Equal(t, Tokens{1, 3, 5}, actual) +} diff --git a/vendor/github.com/grafana/dskit/ring/util.go b/pkg/ring/util.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/util.go rename to pkg/ring/util.go diff --git a/vendor/github.com/grafana/dskit/ring/util/string_utils.go b/pkg/ring/util/string_utils.go similarity index 100% rename from vendor/github.com/grafana/dskit/ring/util/string_utils.go rename to pkg/ring/util/string_utils.go diff --git a/pkg/ring/util_test.go b/pkg/ring/util_test.go new file mode 100644 index 00000000000..8a4563e5dcb --- /dev/null +++ b/pkg/ring/util_test.go @@ -0,0 +1,417 @@ +package ring + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" +) + +type RingMock struct { + mock.Mock +} + +func (r *RingMock) Collect(ch chan<- prometheus.Metric) {} + +func (r *RingMock) Describe(ch chan<- *prometheus.Desc) {} + +func (r *RingMock) Get(key uint32, op Operation, bufDescs []InstanceDesc, bufHosts, bufZones []string) (ReplicationSet, error) { + args := r.Called(key, op, bufDescs, bufHosts, bufZones) + return args.Get(0).(ReplicationSet), args.Error(1) +} + +func (r *RingMock) GetAllHealthy(op Operation) (ReplicationSet, error) { + args := r.Called(op) + return args.Get(0).(ReplicationSet), args.Error(1) +} + +func (r *RingMock) GetReplicationSetForOperation(op Operation) (ReplicationSet, error) { + args := r.Called(op) + return args.Get(0).(ReplicationSet), args.Error(1) +} + +func (r *RingMock) ReplicationFactor() int { + return 0 +} + +func (r *RingMock) InstancesCount() int { + return 0 +} + +func (r *RingMock) ShuffleShard(identifier string, size int) ReadRing { + args := r.Called(identifier, size) + return args.Get(0).(ReadRing) +} + +func (r *RingMock) GetInstanceState(instanceID string) (InstanceState, error) { + args := r.Called(instanceID) + return args.Get(0).(InstanceState), args.Error(1) +} + +func (r *RingMock) ShuffleShardWithLookback(identifier string, size int, lookbackPeriod time.Duration, now time.Time) ReadRing { + args := r.Called(identifier, size, lookbackPeriod, now) + return args.Get(0).(ReadRing) +} + +func (r *RingMock) HasInstance(instanceID string) bool { + return true +} + +func (r *RingMock) CleanupShuffleShardCache(identifier string) {} + +func TestGenerateTokens(t *testing.T) { + tokens := GenerateTokens(1000000, nil) + + dups := make(map[uint32]int) + + for ix, v := range tokens { + if ox, ok := dups[v]; ok { + t.Errorf("Found duplicate token %d, tokens[%d]=%d, tokens[%d]=%d", v, ix, tokens[ix], ox, tokens[ox]) + } else { + dups[v] = ix + } + } +} + +func TestGenerateTokens_IgnoresOldTokens(t *testing.T) { + first := GenerateTokens(1000000, nil) + second := GenerateTokens(1000000, first) + + dups := make(map[uint32]bool) + + for _, v := range first { + dups[v] = true + } + + for _, v := range second { + if dups[v] { + t.Fatal("GenerateTokens returned old token") + } + } +} + +func createStartingRing() *Ring { + // Init the ring. + ringDesc := &Desc{Ingesters: map[string]InstanceDesc{ + "instance-1": {Addr: "127.0.0.1", State: ACTIVE, Timestamp: time.Now().Unix()}, + "instance-2": {Addr: "127.0.0.2", State: PENDING, Timestamp: time.Now().Unix()}, + "instance-3": {Addr: "127.0.0.3", State: JOINING, Timestamp: time.Now().Unix()}, + "instance-4": {Addr: "127.0.0.4", State: LEAVING, Timestamp: time.Now().Unix()}, + "instance-5": {Addr: "127.0.0.5", State: ACTIVE, Timestamp: time.Now().Unix()}, + }} + + ring := &Ring{ + cfg: Config{HeartbeatTimeout: time.Minute}, + ringDesc: ringDesc, + ringTokens: ringDesc.GetTokens(), + ringTokensByZone: ringDesc.getTokensByZone(), + ringInstanceByToken: ringDesc.getTokensInfo(), + ringZones: getZones(ringDesc.getTokensByZone()), + strategy: NewDefaultReplicationStrategy(), + } + + return ring +} + +func TestWaitRingStability_ShouldReturnAsSoonAsMinStabilityIsReachedOnNoChanges(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 10 * time.Second + ) + + ring := createStartingRing() + + startTime := time.Now() + require.NoError(t, WaitRingStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, minStability) + assert.Less(t, elapsedTime, 2*minStability) +} + +func TestWaitRingTokensStability_ShouldReturnAsSoonAsMinStabilityIsReachedOnNoChanges(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 10 * time.Second + ) + + ring := createStartingRing() + + startTime := time.Now() + require.NoError(t, WaitRingTokensStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, minStability) + assert.Less(t, elapsedTime, 2*minStability) +} + +func addInstanceAfterSomeTime(ring *Ring, addInstanceAfter time.Duration) { + go func() { + time.Sleep(addInstanceAfter) + + ring.mtx.Lock() + defer ring.mtx.Unlock() + ringDesc := ring.ringDesc + instanceID := fmt.Sprintf("127.0.0.%d", len(ringDesc.Ingesters)+1) + ringDesc.Ingesters[instanceID] = InstanceDesc{Addr: instanceID, State: ACTIVE, Timestamp: time.Now().Unix()} + ring.ringDesc = ringDesc + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + }() +} + +func TestWaitRingStability_ShouldReturnOnceMinStabilityOfInstancesHasBeenReached(t *testing.T) { + t.Parallel() + + const ( + minStability = 3 * time.Second + addInstanceAfter = 2 * time.Second + maxWaiting = 15 * time.Second + ) + + ring := createStartingRing() + + // Add 1 new instance after some time. + addInstanceAfterSomeTime(ring, addInstanceAfter) + + startTime := time.Now() + require.NoError(t, WaitRingStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, minStability+addInstanceAfter) + assert.LessOrEqual(t, elapsedTime, minStability+addInstanceAfter+3*time.Second) +} + +func TestWaitRingTokensStability_ShouldReturnOnceMinStabilityOfInstancesHasBeenReached(t *testing.T) { + t.Parallel() + + const ( + minStability = 3 * time.Second + addInstanceAfter = 2 * time.Second + maxWaiting = 15 * time.Second + ) + + ring := createStartingRing() + + // Add 1 new instance after some time. + addInstanceAfterSomeTime(ring, addInstanceAfter) + + startTime := time.Now() + require.NoError(t, WaitRingTokensStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, minStability+addInstanceAfter) + assert.LessOrEqual(t, elapsedTime, minStability+addInstanceAfter+3*time.Second) +} + +func addInstancesPeriodically(ring *Ring) chan struct{} { + // Keep changing the ring. + done := make(chan struct{}) + + go func() { + for { + select { + case <-done: + return + case <-time.After(time.Second): + ring.mtx.Lock() + ringDesc := ring.ringDesc + instanceID := fmt.Sprintf("127.0.0.%d", len(ringDesc.Ingesters)+1) + ringDesc.Ingesters[instanceID] = InstanceDesc{Addr: instanceID, State: ACTIVE, Timestamp: time.Now().Unix()} + ring.ringDesc = ringDesc + ring.ringTokens = ringDesc.GetTokens() + ring.ringTokensByZone = ringDesc.getTokensByZone() + ring.ringInstanceByToken = ringDesc.getTokensInfo() + ring.ringZones = getZones(ringDesc.getTokensByZone()) + + ring.mtx.Unlock() + } + } + }() + return done +} + +func TestWaitRingStability_ShouldReturnErrorIfInstancesAddedAndMaxWaitingIsReached(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 7 * time.Second + ) + + ring := createStartingRing() + + done := addInstancesPeriodically(ring) + defer close(done) + + startTime := time.Now() + require.Equal(t, context.DeadlineExceeded, WaitRingStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, maxWaiting) +} + +func TestWaitRingTokensStability_ShouldReturnErrorIfInstancesAddedAndMaxWaitingIsReached(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 7 * time.Second + ) + + ring := createStartingRing() + + done := addInstancesPeriodically(ring) + defer close(done) + + startTime := time.Now() + require.Equal(t, context.DeadlineExceeded, WaitRingTokensStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, maxWaiting) +} + +// Keep changing the ring in a way to avoid repeating the same set of states for at least 2 sec +func changeStatePeriodically(ring *Ring) chan struct{} { + done := make(chan struct{}) + go func() { + instanceToMutate := "instance-1" + states := []InstanceState{PENDING, JOINING, ACTIVE, LEAVING} + stateIdx := 0 + + for states[stateIdx] != ring.ringDesc.Ingesters[instanceToMutate].State { + stateIdx++ + } + + for { + select { + case <-done: + return + case <-time.After(time.Second): + stateIdx++ + ring.mtx.Lock() + ringDesc := ring.ringDesc + desc := InstanceDesc{Addr: "127.0.0.1", State: states[stateIdx%len(states)], Timestamp: time.Now().Unix()} + ringDesc.Ingesters[instanceToMutate] = desc + ring.mtx.Unlock() + } + } + }() + + return done +} + +func TestWaitRingStability_ShouldReturnErrorIfInstanceStateIsChangingAndMaxWaitingIsReached(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 7 * time.Second + ) + + ring := createStartingRing() + + // Keep changing the ring. + done := changeStatePeriodically(ring) + defer close(done) + + startTime := time.Now() + require.Equal(t, context.DeadlineExceeded, WaitRingStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, maxWaiting) +} + +func TestWaitRingTokensStability_ShouldReturnOnceMinStabilityOfInstancesHasBeenReachedWhileStateCanChange(t *testing.T) { + t.Parallel() + + const ( + minStability = 2 * time.Second + maxWaiting = 7 * time.Second + ) + + ring := createStartingRing() + + // Keep changing the ring. + done := changeStatePeriodically(ring) + defer close(done) + + startTime := time.Now() + require.NoError(t, WaitRingTokensStability(context.Background(), ring, Reporting, minStability, maxWaiting)) + elapsedTime := time.Since(startTime) + + assert.GreaterOrEqual(t, elapsedTime, minStability) + assert.Less(t, elapsedTime, 2*minStability) +} + +func TestWaitInstanceState_Timeout(t *testing.T) { + t.Parallel() + + const ( + instanceID = "test" + timeoutDuration = time.Second + ) + + ctx, cancel := context.WithTimeout(context.Background(), timeoutDuration) + defer cancel() + + ring := &RingMock{} + ring.On("GetInstanceState", mock.Anything, mock.Anything).Return(ACTIVE, nil) + + err := WaitInstanceState(ctx, ring, instanceID, PENDING) + + assert.Equal(t, context.DeadlineExceeded, err) + ring.AssertCalled(t, "GetInstanceState", instanceID) +} + +func TestWaitInstanceState_TimeoutOnError(t *testing.T) { + t.Parallel() + + const ( + instanceID = "test" + timeoutDuration = time.Second + ) + + ctx, cancel := context.WithTimeout(context.Background(), timeoutDuration) + defer cancel() + + ring := &RingMock{} + ring.On("GetInstanceState", mock.Anything, mock.Anything).Return(PENDING, errors.New("instance not found in the ring")) + + err := WaitInstanceState(ctx, ring, instanceID, ACTIVE) + + assert.Equal(t, context.DeadlineExceeded, err) + ring.AssertCalled(t, "GetInstanceState", instanceID) +} + +func TestWaitInstanceState_ExitsAfterActualStateEqualsState(t *testing.T) { + t.Parallel() + + const ( + instanceID = "test" + timeoutDuration = time.Second + ) + + ctx, cancel := context.WithTimeout(context.Background(), timeoutDuration) + defer cancel() + + ring := &RingMock{} + ring.On("GetInstanceState", mock.Anything, mock.Anything).Return(ACTIVE, nil) + + err := WaitInstanceState(ctx, ring, instanceID, ACTIVE) + + assert.Nil(t, err) + ring.AssertNumberOfCalls(t, "GetInstanceState", 1) +} diff --git a/pkg/ruler/client_pool.go b/pkg/ruler/client_pool.go index 5338da713d1..717d154e0fc 100644 --- a/pkg/ruler/client_pool.go +++ b/pkg/ruler/client_pool.go @@ -5,13 +5,14 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/grpcclient" - "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "google.golang.org/grpc" "google.golang.org/grpc/health/grpc_health_v1" + + "github.com/cortexproject/cortex/pkg/ring/client" ) // ClientsPool is the interface used to get the client from the pool for a specified address. diff --git a/pkg/ruler/lifecycle.go b/pkg/ruler/lifecycle.go index efa6ae205f9..65bb4bf7123 100644 --- a/pkg/ruler/lifecycle.go +++ b/pkg/ruler/lifecycle.go @@ -1,7 +1,7 @@ package ruler import ( - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" ) func (r *Ruler) OnRingInstanceRegister(_ *ring.BasicLifecycler, ringDesc ring.Desc, instanceExists bool, instanceID string, instanceDesc ring.InstanceDesc) (ring.InstanceState, ring.Tokens) { diff --git a/pkg/ruler/lifecycle_test.go b/pkg/ruler/lifecycle_test.go index 358525a2d3d..83bf396a941 100644 --- a/pkg/ruler/lifecycle_test.go +++ b/pkg/ruler/lifecycle_test.go @@ -9,11 +9,11 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/util/test" ) diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index 9c33eba2fb2..f0398c707db 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -18,7 +18,6 @@ import ( "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/grpcclient" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -31,6 +30,7 @@ import ( "golang.org/x/sync/errgroup" "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler/rulespb" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/cortexproject/cortex/pkg/tenant" diff --git a/pkg/ruler/ruler_ring.go b/pkg/ruler/ruler_ring.go index b22ce6b4186..443dfae922b 100644 --- a/pkg/ruler/ruler_ring.go +++ b/pkg/ruler/ruler_ring.go @@ -9,7 +9,8 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" + + "github.com/cortexproject/cortex/pkg/ring" ) const ( diff --git a/pkg/ruler/ruler_test.go b/pkg/ruler/ruler_test.go index 97a578c684e..3efc15e0831 100644 --- a/pkg/ruler/ruler_test.go +++ b/pkg/ruler/ruler_test.go @@ -17,28 +17,16 @@ import ( "time" "unsafe" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/mock" - "gopkg.in/yaml.v2" - - "github.com/cortexproject/cortex/pkg/chunk/purger" - "github.com/cortexproject/cortex/pkg/querier" - "github.com/cortexproject/cortex/pkg/util/validation" - - "go.uber.org/atomic" - - "google.golang.org/grpc" - "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/gorilla/mux" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/prometheus/client_golang/prometheus" prom_testutil "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/rulefmt" "github.com/prometheus/prometheus/notifier" @@ -46,16 +34,24 @@ import ( promRules "github.com/prometheus/prometheus/rules" "github.com/prometheus/prometheus/storage" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" "github.com/weaveworks/common/user" + "go.uber.org/atomic" + "google.golang.org/grpc" + "gopkg.in/yaml.v2" "github.com/cortexproject/cortex/pkg/chunk" + "github.com/cortexproject/cortex/pkg/chunk/purger" "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/querier" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/ruler/rulespb" "github.com/cortexproject/cortex/pkg/ruler/rulestore" "github.com/cortexproject/cortex/pkg/ruler/rulestore/objectclient" "github.com/cortexproject/cortex/pkg/tenant" "github.com/cortexproject/cortex/pkg/util" + "github.com/cortexproject/cortex/pkg/util/validation" ) func defaultRulerConfig(t testing.TB, store rulestore.RuleStore) (Config, func()) { diff --git a/pkg/storegateway/gateway.go b/pkg/storegateway/gateway.go index f085364490a..0f45e461fb2 100644 --- a/pkg/storegateway/gateway.go +++ b/pkg/storegateway/gateway.go @@ -10,7 +10,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -20,6 +19,7 @@ import ( "github.com/thanos-io/thanos/pkg/store/storepb" "github.com/weaveworks/common/logging" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storegateway/storegatewaypb" diff --git a/pkg/storegateway/gateway_ring.go b/pkg/storegateway/gateway_ring.go index 636cdce9c3c..c57a2f19a31 100644 --- a/pkg/storegateway/gateway_ring.go +++ b/pkg/storegateway/gateway_ring.go @@ -10,8 +10,8 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" + "github.com/cortexproject/cortex/pkg/ring" util_log "github.com/cortexproject/cortex/pkg/util/log" ) diff --git a/pkg/storegateway/gateway_ring_test.go b/pkg/storegateway/gateway_ring_test.go index b621a566adf..ef1908f653b 100644 --- a/pkg/storegateway/gateway_ring_test.go +++ b/pkg/storegateway/gateway_ring_test.go @@ -4,8 +4,9 @@ import ( "testing" "time" - "github.com/grafana/dskit/ring" "github.com/stretchr/testify/assert" + + "github.com/cortexproject/cortex/pkg/ring" ) func TestIsHealthyForStoreGatewayOperations(t *testing.T) { diff --git a/pkg/storegateway/gateway_test.go b/pkg/storegateway/gateway_test.go index e79623db539..1ef229c4ac5 100644 --- a/pkg/storegateway/gateway_test.go +++ b/pkg/storegateway/gateway_test.go @@ -19,7 +19,6 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/pkg/errors" @@ -38,6 +37,7 @@ import ( "github.com/thanos-io/thanos/pkg/store/storepb" "google.golang.org/grpc/status" + "github.com/cortexproject/cortex/pkg/ring" "github.com/cortexproject/cortex/pkg/storage/bucket" "github.com/cortexproject/cortex/pkg/storage/bucket/filesystem" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" diff --git a/pkg/storegateway/sharding_strategy.go b/pkg/storegateway/sharding_strategy.go index b9a27f5dd70..64f0f19de6a 100644 --- a/pkg/storegateway/sharding_strategy.go +++ b/pkg/storegateway/sharding_strategy.go @@ -5,13 +5,13 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring" "github.com/oklog/ulid" "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/extprom" "github.com/thanos-io/thanos/pkg/objstore" + "github.com/cortexproject/cortex/pkg/ring" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" ) diff --git a/pkg/storegateway/sharding_strategy_test.go b/pkg/storegateway/sharding_strategy_test.go index 9ba549f6275..207c213ef5e 100644 --- a/pkg/storegateway/sharding_strategy_test.go +++ b/pkg/storegateway/sharding_strategy_test.go @@ -7,7 +7,6 @@ import ( "github.com/go-kit/log" "github.com/grafana/dskit/kv/consul" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" @@ -17,6 +16,7 @@ import ( "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/extprom" + "github.com/cortexproject/cortex/pkg/ring" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" ) diff --git a/vendor/github.com/grafana/dskit/test/poll.go b/vendor/github.com/grafana/dskit/test/poll.go new file mode 100644 index 00000000000..05ba41235ac --- /dev/null +++ b/vendor/github.com/grafana/dskit/test/poll.go @@ -0,0 +1,26 @@ +package test + +import ( + "reflect" + "testing" + "time" +) + +// Poll repeatedly calls a function until the function returns the correct response or until poll timeout. +func Poll(t testing.TB, d time.Duration, want interface{}, have func() interface{}) { + t.Helper() + deadline := time.Now().Add(d) + for { + if time.Now().After(deadline) { + break + } + if reflect.DeepEqual(want, have()) { + return + } + time.Sleep(d / 100) + } + h := have() + if !reflect.DeepEqual(want, h) { + t.Fatalf("expected %v, got %v", want, h) + } +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 061c43b088d..d92bd133335 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -374,13 +374,10 @@ github.com/grafana/dskit/limiter github.com/grafana/dskit/middleware github.com/grafana/dskit/modules github.com/grafana/dskit/multierror -github.com/grafana/dskit/ring -github.com/grafana/dskit/ring/client -github.com/grafana/dskit/ring/shard -github.com/grafana/dskit/ring/util github.com/grafana/dskit/runtimeconfig github.com/grafana/dskit/runutil github.com/grafana/dskit/services +github.com/grafana/dskit/test # github.com/grpc-ecosystem/go-grpc-middleware v1.3.0 github.com/grpc-ecosystem/go-grpc-middleware # github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.0.0-rc.2.0.20201207153454-9f6bf00c00a7