diff --git a/CHANGELOG.md b/CHANGELOG.md index 4deb01dede2..9f44817f779 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [CHANGE] StoreGateway/Alertmanager: Add default 5s connection timeout on client. #6603 * [FEATURE] Query Frontend: Add dynamic interval size for query splitting. This is enabled by configuring experimental flags `querier.max-shards-per-query` and/or `querier.max-fetched-data-duration-per-query`. The split interval size is dynamically increased to maintain a number of shards and total duration fetched below the configured values. #6458 * [FEATURE] Querier/Ruler: Add `query_partial_data` and `rules_partial_data` limits to allow queries/rules to be evaluated with data from a single zone, if other zones are not available. #6526 * [FEATURE] Update prometheus alertmanager version to v0.28.0 and add new integration msteamsv2, jira, and rocketchat. #6590 diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index 47dd9998dd9..5d09d1169c2 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -222,6 +222,11 @@ querier: # CLI flag: -querier.store-gateway-client.healthcheck.timeout [timeout: | default = 1s] + # The maximum amount of time to establish a connection. A value of 0 means + # using default gRPC client connect timeout 5s. + # CLI flag: -querier.store-gateway-client.connect-timeout + [connect_timeout: | default = 5s] + # If enabled, store gateway query stats will be logged using `info` log level. # CLI flag: -querier.store-gateway-query-stats-enabled [store_gateway_query_stats: | default = true] diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 34f5cda05cf..5a806a33f14 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -503,6 +503,11 @@ alertmanager_client: # CLI flag: -alertmanager.alertmanager-client.grpc-max-send-msg-size [max_send_msg_size: | default = 4194304] + # The maximum amount of time to establish a connection. A value of 0 means + # using default gRPC client connect timeout 5s. + # CLI flag: -alertmanager.alertmanager-client.connect-timeout + [connect_timeout: | default = 5s] + # The interval between persisting the current alertmanager state (notification # log and silences) to object storage. This is only used when sharding is # enabled. This state is read when all replicas for a shard can not be @@ -4100,6 +4105,11 @@ store_gateway_client: # CLI flag: -querier.store-gateway-client.healthcheck.timeout [timeout: | default = 1s] + # The maximum amount of time to establish a connection. A value of 0 means + # using default gRPC client connect timeout 5s. + # CLI flag: -querier.store-gateway-client.connect-timeout + [connect_timeout: | default = 5s] + # If enabled, store gateway query stats will be logged using `info` log level. # CLI flag: -querier.store-gateway-query-stats-enabled [store_gateway_query_stats: | default = true] diff --git a/pkg/alertmanager/alertmanager_client.go b/pkg/alertmanager/alertmanager_client.go index 041449269a4..5e144e579a2 100644 --- a/pkg/alertmanager/alertmanager_client.go +++ b/pkg/alertmanager/alertmanager_client.go @@ -40,6 +40,7 @@ type ClientConfig struct { GRPCCompression string `yaml:"grpc_compression"` MaxRecvMsgSize int `yaml:"max_recv_msg_size"` MaxSendMsgSize int `yaml:"max_send_msg_size"` + ConnectTimeout time.Duration `yaml:"connect_timeout"` } // RegisterFlagsWithPrefix registers flags with prefix. @@ -50,6 +51,7 @@ func (cfg *ClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) cfg.TLS.RegisterFlagsWithPrefix(prefix, f) f.IntVar(&cfg.MaxRecvMsgSize, prefix+".grpc-max-recv-msg-size", 16*1024*1024, "gRPC client max receive message size (bytes).") f.IntVar(&cfg.MaxSendMsgSize, prefix+".grpc-max-send-msg-size", 4*1024*1024, "gRPC client max send message size (bytes).") + f.DurationVar(&cfg.ConnectTimeout, prefix+".connect-timeout", 5*time.Second, "The maximum amount of time to establish a connection. A value of 0 means using default gRPC client connect timeout 5s.") } type alertmanagerClientsPool struct { @@ -67,6 +69,7 @@ func newAlertmanagerClientsPool(discovery client.PoolServiceDiscovery, amClientC BackoffOnRatelimits: false, TLSEnabled: amClientCfg.TLSEnabled, TLS: amClientCfg.TLS, + ConnectTimeout: amClientCfg.ConnectTimeout, } requestDuration := promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ diff --git a/pkg/querier/store_gateway_client.go b/pkg/querier/store_gateway_client.go index 088619599b3..140e3b39078 100644 --- a/pkg/querier/store_gateway_client.go +++ b/pkg/querier/store_gateway_client.go @@ -79,6 +79,7 @@ func newStoreGatewayClientPool(discovery client.PoolServiceDiscovery, clientConf BackoffOnRatelimits: false, TLSEnabled: clientConfig.TLSEnabled, TLS: clientConfig.TLS, + ConnectTimeout: clientConfig.ConnectTimeout, }, HealthCheckConfig: clientConfig.HealthCheckConfig, } @@ -103,11 +104,13 @@ type ClientConfig struct { TLS tls.ClientConfig `yaml:",inline"` GRPCCompression string `yaml:"grpc_compression"` HealthCheckConfig grpcclient.HealthCheckConfig `yaml:"healthcheck_config" doc:"description=EXPERIMENTAL: If enabled, gRPC clients perform health checks for each target and fail the request if the target is marked as unhealthy."` + ConnectTimeout time.Duration `yaml:"connect_timeout"` } func (cfg *ClientConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.BoolVar(&cfg.TLSEnabled, prefix+".tls-enabled", cfg.TLSEnabled, "Enable TLS for gRPC client connecting to store-gateway.") f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", "Use compression when sending messages. Supported values are: 'gzip', 'snappy' and '' (disable compression)") + f.DurationVar(&cfg.ConnectTimeout, prefix+".connect-timeout", 5*time.Second, "The maximum amount of time to establish a connection. A value of 0 means using default gRPC client connect timeout 5s.") cfg.TLS.RegisterFlagsWithPrefix(prefix, f) cfg.HealthCheckConfig.RegisterFlagsWithPrefix(prefix, f) }