diff --git a/CHANGELOG.md b/CHANGELOG.md index 0cf84c8f922..bfc4d732d24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,6 +48,7 @@ * `cortex__blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component=""}` * `cortex__blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component=""}` * [CHANGE] Available command-line flags are printed to stdout, and only when requested via `-help`. Using invalid flag no longer causes printing of all available flags. #2691 +* [CHANGE] Experimental Memberlist ring: randomize gossip node names to avoid conflicts when running multiple clients on the same host, or reusing host names (eg. pods in statefulset). Node name randomization can be disabled by using `-memberlist.randomize-node-name=false`. #2715 * [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502 * [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609 * [FEATURE] Experimental gRPC Store: Added support to 3rd parties index and chunk stores using gRPC client/server plugin mechanism. #2220 diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 7a523cab1a7..0f6d0d1a2c3 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2267,6 +2267,10 @@ The `memberlist_config` configures the Gossip memberlist. # CLI flag: -memberlist.nodename [node_name: | default = ""] +# Add random suffix to the node name. +# CLI flag: -memberlist.randomize-node-name +[randomize_node_name: | default = true] + # The timeout for establishing a connection with a remote node, and for # read/write operations. Uses memberlist LAN defaults if 0. # CLI flag: -memberlist.stream-timeout diff --git a/docs/configuration/single-process-config-blocks-gossip-1.yaml b/docs/configuration/single-process-config-blocks-gossip-1.yaml index 6b4ae86b224..3b13746d569 100644 --- a/docs/configuration/single-process-config-blocks-gossip-1.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-1.yaml @@ -59,8 +59,6 @@ ingester: replication_factor: 1 memberlist: - # defaults to hostname - node_name: "Ingester 1" bind_port: 7946 join_members: - localhost:7947 diff --git a/docs/configuration/single-process-config-blocks-gossip-2.yaml b/docs/configuration/single-process-config-blocks-gossip-2.yaml index 8c74f3f9d20..b3867c49169 100644 --- a/docs/configuration/single-process-config-blocks-gossip-2.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-2.yaml @@ -58,8 +58,6 @@ ingester: replication_factor: 1 memberlist: - # defaults to hostname - node_name: "Ingester 2" bind_port: 7947 join_members: - localhost:7946 diff --git a/pkg/ring/kv/memberlist/memberlist_client.go b/pkg/ring/kv/memberlist/memberlist_client.go index 66e0f9b2ce7..c135baf4da4 100644 --- a/pkg/ring/kv/memberlist/memberlist_client.go +++ b/pkg/ring/kv/memberlist/memberlist_client.go @@ -3,6 +3,7 @@ package memberlist import ( "bytes" "context" + "crypto/rand" "encoding/binary" "errors" "flag" @@ -81,6 +82,7 @@ func (c *Client) WatchPrefix(ctx context.Context, prefix string, f func(string, type KVConfig struct { // Memberlist options. NodeName string `yaml:"node_name"` + RandomizeNodeName bool `yaml:"randomize_node_name"` StreamTimeout time.Duration `yaml:"stream_timeout"` RetransmitMult int `yaml:"retransmit_factor"` PushPullInterval time.Duration `yaml:"pull_push_interval"` @@ -113,6 +115,7 @@ type KVConfig struct { func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { // "Defaults to hostname" -- memberlist sets it to hostname by default. f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", "", "Name of the node in memberlist cluster. Defaults to hostname.") // memberlist.DefaultLANConfig will put hostname here. + f.BoolVar(&cfg.RandomizeNodeName, prefix+"memberlist.randomize-node-name", true, "Add random suffix to the node name.") f.DurationVar(&cfg.StreamTimeout, prefix+"memberlist.stream-timeout", 0, "The timeout for establishing a connection with a remote node, and for read/write operations. Uses memberlist LAN defaults if 0.") f.IntVar(&cfg.RetransmitMult, prefix+"memberlist.retransmit-factor", 0, "Multiplication factor used when sending out messages (factor * log(N+1)).") f.Var(&cfg.JoinMembers, prefix+"memberlist.join", "Other cluster members to join. Can be specified multiple times. Memberlist store is EXPERIMENTAL.") @@ -128,6 +131,16 @@ func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { cfg.TCPTransport.RegisterFlags(f, prefix) } +func generateRandomSuffix() string { + suffix := make([]byte, 4) + _, err := rand.Read(suffix) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to generate random suffix", "err", err) + return "error" + } + return fmt.Sprintf("%2x", suffix) +} + // KV implements Key-Value store on top of memberlist library. KV store has API similar to kv.Client, // except methods also need explicit codec for each operation. type KV struct { @@ -240,6 +253,10 @@ func NewKV(cfg KVConfig) (*KV, error) { if cfg.NodeName != "" { mlCfg.Name = cfg.NodeName } + if cfg.RandomizeNodeName { + mlCfg.Name = mlCfg.Name + "-" + generateRandomSuffix() + level.Info(util.Logger).Log("msg", "Using memberlist cluster node name", "name", mlCfg.Name) + } mlCfg.LogOutput = newMemberlistLoggerAdapter(util.Logger, false) mlCfg.Transport = tr diff --git a/pkg/ring/kv/memberlist/memberlist_client_test.go b/pkg/ring/kv/memberlist/memberlist_client_test.go index f1b4d03135b..25c80d1131a 100644 --- a/pkg/ring/kv/memberlist/memberlist_client_test.go +++ b/pkg/ring/kv/memberlist/memberlist_client_test.go @@ -807,3 +807,12 @@ func TestMultipleCodecs(t *testing.T) { require.NotNil(t, val) require.Equal(t, 5, val.(distributedCounter)["test"]) } + +func TestGenerateRandomSuffix(t *testing.T) { + h1 := generateRandomSuffix() + h2 := generateRandomSuffix() + h3 := generateRandomSuffix() + + require.NotEqual(t, h1, h2) + require.NotEqual(t, h2, h3) +}