From 930243c646c30afe8fd44d76884bac5cba9fb634 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Tue, 16 Jan 2024 21:45:23 -0800 Subject: [PATCH 1/5] otlpgrpc: turn on round_robin loadbalancing policy and use kuberesolver for kubernetes service endpoint Signed-off-by: Ben Ye --- pkg/tracing/tracing.go | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index 8c92ac2fba5..6a30bde82de 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -8,6 +8,7 @@ import ( "github.com/go-kit/log/level" "github.com/pkg/errors" + "github.com/sercand/kuberesolver" "github.com/weaveworks/common/tracing" "go.opentelemetry.io/contrib/propagators/aws/xray" "google.golang.org/grpc/credentials" @@ -91,16 +92,23 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont level.Warn(util_log.Logger).Log("msg", "DEPRECATED: otel.otlp and otel.oltp both set, using otel.otlp because otel.oltp is deprecated") } - options := []otlptracegrpc.Option{ - otlptracegrpc.WithEndpoint(c.Otel.OtlpEndpoint), - } - + endpoint := c.Otel.OtlpEndpoint if (c.Otel.OtlpEndpoint == "") && (len(c.Otel.OltpEndpoint) > 0) { level.Warn(util_log.Logger).Log("msg", "DEPRECATED: otel.oltp is deprecated use otel.otlp") - options = []otlptracegrpc.Option{ - otlptracegrpc.WithEndpoint(c.Otel.OltpEndpoint), - } + endpoint = c.Otel.OltpEndpoint } + options := []otlptracegrpc.Option{ + otlptracegrpc.WithEndpoint(endpoint), + } + // Following https://github.com/sercand/kuberesolver/blob/master/builder.go#L96. + if strings.HasPrefix(endpoint, "kubernetes://") { + // Registers the kuberesolver which resolves endpoint with prefix kubernetes:// + // as kubernetes service endpoint addresses. + kuberesolver.RegisterInCluster() + } + + // Always use `round_robin` gRPC client side load balancing. + options = append(options, otlptracegrpc.WithServiceConfig(`{"loadBalancingPolicy": "round_robin"}`)) if c.Otel.TLSEnabled { tlsConfig, err := c.Otel.TLS.GetTLSConfig() From c1913e2d5c91b8caa32681e151d69bcc8a1deb1f Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Tue, 16 Jan 2024 21:58:17 -0800 Subject: [PATCH 2/5] update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + go.mod | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 758f0f89b52..c47447bd8d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * [CHANGE] Index Cache: Multi level cache backfilling operation becomes async. Added `-blocks-storage.bucket-store.index-cache.multilevel.max-async-concurrency` and `-blocks-storage.bucket-store.index-cache.multilevel.max-async-buffer-size` configs and metric `cortex_store_multilevel_index_cache_backfill_dropped_items_total` for number of dropped items. #5661 * [FEATURE] Ingester: Add per-tenant new metric `cortex_ingester_tsdb_data_replay_duration_seconds`. #5477 * [FEATURE] Query Frontend/Scheduler: Add query priority support. #5605 +* [FEATURE] Tracing: Use `round_robin` gRPC client side LB policy for sending OTLP traces. Also enables `kuberesolver` to resolve endpoints address with `kubernetes://` prefix as Kubernetes service. #5731 * [ENHANCEMENT] Store Gateway: Added `-store-gateway.enabled-tenants` and `-store-gateway.disabled-tenants` to explicitly enable or disable store-gateway for specific tenants. #5638 * [ENHANCEMENT] Compactor: Add new compactor metric `cortex_compactor_start_duration_seconds`. #5683 * [ENHANCEMENT] Upgraded Docker base images to `alpine:3.18`. #5684 diff --git a/go.mod b/go.mod index 518e7e7ee26..050756a4761 100644 --- a/go.mod +++ b/go.mod @@ -80,6 +80,7 @@ require ( github.com/VictoriaMetrics/fastcache v1.12.1 github.com/cespare/xxhash/v2 v2.2.0 github.com/google/go-cmp v0.6.0 + github.com/sercand/kuberesolver v2.4.0+incompatible golang.org/x/exp v0.0.0-20231206192017-f3f8817b8deb google.golang.org/protobuf v1.31.0 ) @@ -196,7 +197,6 @@ require ( github.com/rs/cors v1.9.0 // indirect github.com/rs/xid v1.5.0 // indirect github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect - github.com/sercand/kuberesolver v2.4.0+incompatible // indirect github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect github.com/sirupsen/logrus v1.9.3 // indirect From 55e7734c66c0a43e99a553e8bf829e681703656e Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 17 Jan 2024 15:51:59 -0800 Subject: [PATCH 3/5] make round robin policy configurable Signed-off-by: Ben Ye --- pkg/tracing/tracing.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index 6a30bde82de..9f87225a8d0 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -42,6 +42,7 @@ type Otel struct { OtlpEndpoint string `yaml:"otlp_endpoint" json:"otlp_endpoint"` ExporterType string `yaml:"exporter_type" json:"exporter_type"` SampleRatio float64 `yaml:"sample_ratio" json:"sample_ratio"` + RoundRobin bool `yaml:"round_robin" json:"round_robin"` TLSEnabled bool `yaml:"tls_enabled"` TLS tls.ClientConfig `yaml:"tls"` ExtraDetectors []resource.Detector `yaml:"-"` @@ -56,6 +57,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { f.StringVar(&c.Otel.OtlpEndpoint, p+".otel.otlp-endpoint", "", "otl collector endpoint that the driver will use to send spans.") f.StringVar(&c.Otel.ExporterType, p+".otel.exporter-type", "", "enhance/modify traces/propagators for specific exporter. If empty, OTEL defaults will apply. Supported values are: `awsxray.`") f.BoolVar(&c.Otel.TLSEnabled, p+".otel.tls-enabled", c.Otel.TLSEnabled, "Enable TLS in the GRPC client. This flag needs to be enabled when any other TLS flag is set. If set to false, insecure connection to gRPC server will be used.") + f.BoolVar(&c.Otel.RoundRobin, p+".otel.round-robin", false, "If enabled, use round_robin gRPC load balancing policy. By default, use pick_first policy. For more details, please refer to https://github.com/grpc/grpc/blob/master/doc/load-balancing.md#load-balancing-policies.") c.Otel.TLS.RegisterFlagsWithPrefix(p+".otel.tls", f) } @@ -107,8 +109,9 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont kuberesolver.RegisterInCluster() } - // Always use `round_robin` gRPC client side load balancing. - options = append(options, otlptracegrpc.WithServiceConfig(`{"loadBalancingPolicy": "round_robin"}`)) + if c.Otel.RoundRobin { + options = append(options, otlptracegrpc.WithServiceConfig(`{"loadBalancingPolicy": "round_robin"}`)) + } if c.Otel.TLSEnabled { tlsConfig, err := c.Otel.TLS.GetTLSConfig() From 96905d96154b59598c135922e675015622ef13a9 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 17 Jan 2024 23:54:18 +0000 Subject: [PATCH 4/5] update doc Signed-off-by: Ben Ye --- docs/configuration/config-file-reference.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index c87286ab1ed..c30731e5d70 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -5024,6 +5024,12 @@ otel: # CLI flag: -tracing.otel.sample-ratio [sample_ratio: | default = 0.001] + # If enabled, use round_robin gRPC load balancing policy. By default, use + # pick_first policy. For more details, please refer to + # https://github.com/grpc/grpc/blob/master/doc/load-balancing.md#load-balancing-policies. + # CLI flag: -tracing.otel.round-robin + [round_robin: | default = false] + # Enable TLS in the GRPC client. This flag needs to be enabled when any other # TLS flag is set. If set to false, insecure connection to gRPC server will be # used. From 688b39bb0a114d98f1f2d0e323eb9a8cd98f873f Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 17 Jan 2024 23:56:30 +0000 Subject: [PATCH 5/5] update changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c47447bd8d2..e2717ca2e1c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,8 @@ * [CHANGE] Index Cache: Multi level cache backfilling operation becomes async. Added `-blocks-storage.bucket-store.index-cache.multilevel.max-async-concurrency` and `-blocks-storage.bucket-store.index-cache.multilevel.max-async-buffer-size` configs and metric `cortex_store_multilevel_index_cache_backfill_dropped_items_total` for number of dropped items. #5661 * [FEATURE] Ingester: Add per-tenant new metric `cortex_ingester_tsdb_data_replay_duration_seconds`. #5477 * [FEATURE] Query Frontend/Scheduler: Add query priority support. #5605 -* [FEATURE] Tracing: Use `round_robin` gRPC client side LB policy for sending OTLP traces. Also enables `kuberesolver` to resolve endpoints address with `kubernetes://` prefix as Kubernetes service. #5731 +* [FEATURE] Tracing: Add `kuberesolver` to resolve endpoints address with `kubernetes://` prefix as Kubernetes service. #5731 +* [FEATURE] Tracing: Add `tracing.otel.round-robin` flag to use `round_robin` gRPC client side LB policy for sending OTLP traces. #5731 * [ENHANCEMENT] Store Gateway: Added `-store-gateway.enabled-tenants` and `-store-gateway.disabled-tenants` to explicitly enable or disable store-gateway for specific tenants. #5638 * [ENHANCEMENT] Compactor: Add new compactor metric `cortex_compactor_start_duration_seconds`. #5683 * [ENHANCEMENT] Upgraded Docker base images to `alpine:3.18`. #5684