From 3a64ce3b735ab476eb01d42b016b7b5ae0e59ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Thu, 11 Jun 2020 11:48:51 +0200 Subject: [PATCH 1/5] Randomize gossip node name by adding random suffix. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gossip node name is mostly invisible to the user, but we log it, just in case it's logged by memberlist too. This helps with running multiple memberlist instances locally, or when using stateful sets. Signed-off-by: Peter Štibraný --- ...single-process-config-blocks-gossip-1.yaml | 2 -- ...single-process-config-blocks-gossip-2.yaml | 2 -- pkg/ring/kv/memberlist/memberlist_client.go | 20 ++++++++++++++----- .../kv/memberlist/memberlist_client_test.go | 9 +++++++++ 4 files changed, 24 insertions(+), 9 deletions(-) diff --git a/docs/configuration/single-process-config-blocks-gossip-1.yaml b/docs/configuration/single-process-config-blocks-gossip-1.yaml index 6b4ae86b224..3b13746d569 100644 --- a/docs/configuration/single-process-config-blocks-gossip-1.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-1.yaml @@ -59,8 +59,6 @@ ingester: replication_factor: 1 memberlist: - # defaults to hostname - node_name: "Ingester 1" bind_port: 7946 join_members: - localhost:7947 diff --git a/docs/configuration/single-process-config-blocks-gossip-2.yaml b/docs/configuration/single-process-config-blocks-gossip-2.yaml index 8c74f3f9d20..b3867c49169 100644 --- a/docs/configuration/single-process-config-blocks-gossip-2.yaml +++ b/docs/configuration/single-process-config-blocks-gossip-2.yaml @@ -58,8 +58,6 @@ ingester: replication_factor: 1 memberlist: - # defaults to hostname - node_name: "Ingester 2" bind_port: 7947 join_members: - localhost:7946 diff --git a/pkg/ring/kv/memberlist/memberlist_client.go b/pkg/ring/kv/memberlist/memberlist_client.go index 66e0f9b2ce7..e016db87c89 100644 --- a/pkg/ring/kv/memberlist/memberlist_client.go +++ b/pkg/ring/kv/memberlist/memberlist_client.go @@ -3,11 +3,13 @@ package memberlist import ( "bytes" "context" + "crypto/rand" "encoding/binary" "errors" "flag" "fmt" "math" + "os" "strings" "sync" "time" @@ -111,8 +113,7 @@ type KVConfig struct { // RegisterFlags registers flags. func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { - // "Defaults to hostname" -- memberlist sets it to hostname by default. - f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", "", "Name of the node in memberlist cluster. Defaults to hostname.") // memberlist.DefaultLANConfig will put hostname here. + f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", getHostnameWithRandomSuffix(), "Name of the node in memberlist cluster. Defaults to hostname with random suffix.") f.DurationVar(&cfg.StreamTimeout, prefix+"memberlist.stream-timeout", 0, "The timeout for establishing a connection with a remote node, and for read/write operations. Uses memberlist LAN defaults if 0.") f.IntVar(&cfg.RetransmitMult, prefix+"memberlist.retransmit-factor", 0, "Multiplication factor used when sending out messages (factor * log(N+1)).") f.Var(&cfg.JoinMembers, prefix+"memberlist.join", "Other cluster members to join. Can be specified multiple times. Memberlist store is EXPERIMENTAL.") @@ -128,6 +129,16 @@ func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { cfg.TCPTransport.RegisterFlags(f, prefix) } +func getHostnameWithRandomSuffix() string { + hostname, _ := os.Hostname() + suffix := make([]byte, 4) + _, err := rand.Read(suffix) + if err == nil { + hostname = fmt.Sprintf("%s-%2x", hostname, suffix) + } + return hostname +} + // KV implements Key-Value store on top of memberlist library. KV store has API similar to kv.Client, // except methods also need explicit codec for each operation. type KV struct { @@ -237,9 +248,8 @@ func NewKV(cfg KVConfig) (*KV, error) { if cfg.DeadNodeReclaimTime > 0 { mlCfg.DeadNodeReclaimTime = cfg.DeadNodeReclaimTime } - if cfg.NodeName != "" { - mlCfg.Name = cfg.NodeName - } + mlCfg.Name = cfg.NodeName + level.Info(util.Logger).Log("msg", "Using memberlist cluster node name", "name", mlCfg.Name) mlCfg.LogOutput = newMemberlistLoggerAdapter(util.Logger, false) mlCfg.Transport = tr diff --git a/pkg/ring/kv/memberlist/memberlist_client_test.go b/pkg/ring/kv/memberlist/memberlist_client_test.go index f1b4d03135b..486e60a516c 100644 --- a/pkg/ring/kv/memberlist/memberlist_client_test.go +++ b/pkg/ring/kv/memberlist/memberlist_client_test.go @@ -807,3 +807,12 @@ func TestMultipleCodecs(t *testing.T) { require.NotNil(t, val) require.Equal(t, 5, val.(distributedCounter)["test"]) } + +func TestGetHostnameWithRandomSuffix(t *testing.T) { + h1 := getHostnameWithRandomSuffix() + h2 := getHostnameWithRandomSuffix() + h3 := getHostnameWithRandomSuffix() + + require.NotEqual(t, h1, h2) + require.NotEqual(t, h2, h3) +} From dbb61566312a9be3600bb9631bfa13cc233425e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Thu, 11 Jun 2020 12:00:52 +0200 Subject: [PATCH 2/5] Added CHANGELOG.md entry. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index cc91c4c20c2..62bbf4f06ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,7 @@ * `cortex__bucket_store_cached_postings_compressed_size_bytes_total` => `cortex_bucket_store_cached_postings_compressed_size_bytes_total{component=""}` * `cortex__blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component=""}` * `cortex__blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component=""}` +* [CHANGE] Experimental Memberlist ring: randomize gossip node names to avoid conflicts when running multiple clients on the same host, or reusing host names (eg. pods in statefulset). #2715 * [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502 * [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609 * [FEATURE] Experimental gRPC Store: Added support to 3rd parties index and chunk stores using gRPC client/server plugin mechanism. #2220 From 6e88ca05577868b56e330f37a9bcf5d202eb2cbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Thu, 11 Jun 2020 12:49:01 +0200 Subject: [PATCH 3/5] Add new flag for randomizing node name. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That plays nicely with our documentation tool. Signed-off-by: Peter Štibraný --- pkg/ring/kv/memberlist/memberlist_client.go | 25 ++++++++++++------- .../kv/memberlist/memberlist_client_test.go | 8 +++--- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/pkg/ring/kv/memberlist/memberlist_client.go b/pkg/ring/kv/memberlist/memberlist_client.go index e016db87c89..c135baf4da4 100644 --- a/pkg/ring/kv/memberlist/memberlist_client.go +++ b/pkg/ring/kv/memberlist/memberlist_client.go @@ -9,7 +9,6 @@ import ( "flag" "fmt" "math" - "os" "strings" "sync" "time" @@ -83,6 +82,7 @@ func (c *Client) WatchPrefix(ctx context.Context, prefix string, f func(string, type KVConfig struct { // Memberlist options. NodeName string `yaml:"node_name"` + RandomizeNodeName bool `yaml:"randomize_node_name"` StreamTimeout time.Duration `yaml:"stream_timeout"` RetransmitMult int `yaml:"retransmit_factor"` PushPullInterval time.Duration `yaml:"pull_push_interval"` @@ -113,7 +113,9 @@ type KVConfig struct { // RegisterFlags registers flags. func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { - f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", getHostnameWithRandomSuffix(), "Name of the node in memberlist cluster. Defaults to hostname with random suffix.") + // "Defaults to hostname" -- memberlist sets it to hostname by default. + f.StringVar(&cfg.NodeName, prefix+"memberlist.nodename", "", "Name of the node in memberlist cluster. Defaults to hostname.") // memberlist.DefaultLANConfig will put hostname here. + f.BoolVar(&cfg.RandomizeNodeName, prefix+"memberlist.randomize-node-name", true, "Add random suffix to the node name.") f.DurationVar(&cfg.StreamTimeout, prefix+"memberlist.stream-timeout", 0, "The timeout for establishing a connection with a remote node, and for read/write operations. Uses memberlist LAN defaults if 0.") f.IntVar(&cfg.RetransmitMult, prefix+"memberlist.retransmit-factor", 0, "Multiplication factor used when sending out messages (factor * log(N+1)).") f.Var(&cfg.JoinMembers, prefix+"memberlist.join", "Other cluster members to join. Can be specified multiple times. Memberlist store is EXPERIMENTAL.") @@ -129,14 +131,14 @@ func (cfg *KVConfig) RegisterFlags(f *flag.FlagSet, prefix string) { cfg.TCPTransport.RegisterFlags(f, prefix) } -func getHostnameWithRandomSuffix() string { - hostname, _ := os.Hostname() +func generateRandomSuffix() string { suffix := make([]byte, 4) _, err := rand.Read(suffix) - if err == nil { - hostname = fmt.Sprintf("%s-%2x", hostname, suffix) + if err != nil { + level.Error(util.Logger).Log("msg", "failed to generate random suffix", "err", err) + return "error" } - return hostname + return fmt.Sprintf("%2x", suffix) } // KV implements Key-Value store on top of memberlist library. KV store has API similar to kv.Client, @@ -248,8 +250,13 @@ func NewKV(cfg KVConfig) (*KV, error) { if cfg.DeadNodeReclaimTime > 0 { mlCfg.DeadNodeReclaimTime = cfg.DeadNodeReclaimTime } - mlCfg.Name = cfg.NodeName - level.Info(util.Logger).Log("msg", "Using memberlist cluster node name", "name", mlCfg.Name) + if cfg.NodeName != "" { + mlCfg.Name = cfg.NodeName + } + if cfg.RandomizeNodeName { + mlCfg.Name = mlCfg.Name + "-" + generateRandomSuffix() + level.Info(util.Logger).Log("msg", "Using memberlist cluster node name", "name", mlCfg.Name) + } mlCfg.LogOutput = newMemberlistLoggerAdapter(util.Logger, false) mlCfg.Transport = tr diff --git a/pkg/ring/kv/memberlist/memberlist_client_test.go b/pkg/ring/kv/memberlist/memberlist_client_test.go index 486e60a516c..25c80d1131a 100644 --- a/pkg/ring/kv/memberlist/memberlist_client_test.go +++ b/pkg/ring/kv/memberlist/memberlist_client_test.go @@ -808,10 +808,10 @@ func TestMultipleCodecs(t *testing.T) { require.Equal(t, 5, val.(distributedCounter)["test"]) } -func TestGetHostnameWithRandomSuffix(t *testing.T) { - h1 := getHostnameWithRandomSuffix() - h2 := getHostnameWithRandomSuffix() - h3 := getHostnameWithRandomSuffix() +func TestGenerateRandomSuffix(t *testing.T) { + h1 := generateRandomSuffix() + h2 := generateRandomSuffix() + h3 := generateRandomSuffix() require.NotEqual(t, h1, h2) require.NotEqual(t, h2, h3) From f99c795ef6e6de77efb9e30157987a1a52771061 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Thu, 11 Jun 2020 13:13:28 +0200 Subject: [PATCH 4/5] Doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- docs/configuration/config-file-reference.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 7a523cab1a7..0f6d0d1a2c3 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -2267,6 +2267,10 @@ The `memberlist_config` configures the Gossip memberlist. # CLI flag: -memberlist.nodename [node_name: | default = ""] +# Add random suffix to the node name. +# CLI flag: -memberlist.randomize-node-name +[randomize_node_name: | default = true] + # The timeout for establishing a connection with a remote node, and for # read/write operations. Uses memberlist LAN defaults if 0. # CLI flag: -memberlist.stream-timeout From a270501f570148e5ef627c288610f5cf39afd85a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20S=CC=8Ctibrany=CC=81?= Date: Mon, 15 Jun 2020 08:14:28 +0200 Subject: [PATCH 5/5] Mention how to disable node name randomization. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Štibraný --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b12af947ec5..bfc4d732d24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -48,7 +48,7 @@ * `cortex__blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component=""}` * `cortex__blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component=""}` * [CHANGE] Available command-line flags are printed to stdout, and only when requested via `-help`. Using invalid flag no longer causes printing of all available flags. #2691 -* [CHANGE] Experimental Memberlist ring: randomize gossip node names to avoid conflicts when running multiple clients on the same host, or reusing host names (eg. pods in statefulset). #2715 +* [CHANGE] Experimental Memberlist ring: randomize gossip node names to avoid conflicts when running multiple clients on the same host, or reusing host names (eg. pods in statefulset). Node name randomization can be disabled by using `-memberlist.randomize-node-name=false`. #2715 * [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502 * [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609 * [FEATURE] Experimental gRPC Store: Added support to 3rd parties index and chunk stores using gRPC client/server plugin mechanism. #2220