cortexproject
diff --git a/‎CHANGELOG.md‎
Lines changed: 40 additions & 0 deletions b/‎CHANGELOG.md‎
Lines changed: 40 additions & 0 deletions
diff --git a/‎integration/e2e/composite_service.go‎
Lines changed: 35 additions & 0 deletions b/‎integration/e2e/composite_service.go‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎integration/e2e/service.go‎
Lines changed: 58 additions & 27 deletions b/‎integration/e2e/service.go‎
Lines changed: 58 additions & 27 deletions
@@ -5,6 +5,46 @@
 * [CHANGE] Query Frontend now uses Round Robin to choose a tenant queue to service next. #2553
 * [CHANGE] `-promql.lookback-delta` is now deprecated and has been replaced by `-querier.lookback-delta` along with `lookback_delta` entry under `querier` in the config file. `-promql.lookback-delta` will be removed in v1.4.0. #2604
 * [CHANGE] Experimental TSDB: removed `-experimental.tsdb.bucket-store.binary-index-header-enabled` flag. Now the binary index-header is always enabled.
+* [CHANGE] Experimental TSDB: Renamed index-cache metrics to use original metric names from Thanos, as Cortex is not aggregating them in any way: #2627
+  * `cortex_<service>_blocks_index_cache_items_evicted_total` => `thanos_store_index_cache_items_evicted_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_items_added_total` => `thanos_store_index_cache_items_added_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_requests_total` => `thanos_store_index_cache_requests_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_items_overflowed_total` => `thanos_store_index_cache_items_overflowed_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_hits_total` => `thanos_store_index_cache_hits_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_items` => `thanos_store_index_cache_items{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_items_size_bytes` => `thanos_store_index_cache_items_size_bytes{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_total_size_bytes` => `thanos_store_index_cache_total_size_bytes{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_memcached_operations_total` =>  `thanos_memcached_operations_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_memcached_operation_failures_total` =>  `thanos_memcached_operation_failures_total{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_memcached_operation_duration_seconds` =>  `thanos_memcached_operation_duration_seconds{name="index-cache"}`
+  * `cortex_<service>_blocks_index_cache_memcached_operation_skipped_total` =>  `thanos_memcached_operation_skipped_total{name="index-cache"}`
+* [CHANGE] Experimental TSDB: Renamed metrics in bucket stores: #2627
+  * `cortex_<service>_blocks_meta_syncs_total` => `cortex_blocks_meta_syncs_total{component="<service>"}`
+  * `cortex_<service>_blocks_meta_sync_failures_total` => `cortex_blocks_meta_sync_failures_total{component="<service>"}`
+  * `cortex_<service>_blocks_meta_sync_duration_seconds` => `cortex_blocks_meta_sync_duration_seconds{component="<service>"}`
+  * `cortex_<service>_blocks_meta_sync_consistency_delay_seconds` => `cortex_blocks_meta_sync_consistency_delay_seconds{component="<service>"}`
+  * `cortex_<service>_blocks_meta_synced` => `cortex_blocks_meta_synced{component="<service>"}`
+  * `cortex_<service>_bucket_store_block_loads_total` => `cortex_block_loads_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_block_load_failures_total` => `cortex_block_load_failures_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_block_drops_total` => `cortex_block_drops_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_block_drop_failures_total` => `cortex_bucket_store_block_drop_failures_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_blocks_loaded` => `cortex_bucket_store_blocks_loaded{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_data_touched` => `cortex_bucket_store_series_data_touched{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_data_fetched` => `cortex_bucket_store_series_data_fetched{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_data_size_touched_bytes` => `cortex_bucket_store_series_data_size_touched_bytes{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_data_size_fetched_bytes` => `cortex_bucket_store_series_data_size_fetched_bytes{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_blocks_queried` => `cortex_bucket_store_series_blocks_queried{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_get_all_duration_seconds` => `cortex_bucket_store_series_get_all_duration_seconds{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_merge_duration_seconds` => `cortex_bucket_store_series_merge_duration_seconds{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_refetches_total` => `cortex_bucket_store_series_refetches_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_series_result_series` => `cortex_bucket_store_series_result_series{component="<service>"}`
+  * `cortex_<service>_bucket_store_cached_postings_compressions_total` => `cortex_bucket_store_cached_postings_compressions_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_cached_postings_compression_errors_total` => `cortex_bucket_store_cached_postings_compression_errors_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_cached_postings_compression_time_seconds` => `cortex_bucket_store_cached_postings_compression_time_seconds{component="<service>"}`
+  * `cortex_<service>_bucket_store_cached_postings_original_size_bytes_total` => `cortex_bucket_store_cached_postings_original_size_bytes_total{component="<service>"}`
+  * `cortex_<service>_bucket_store_cached_postings_compressed_size_bytes_total` => `cortex_bucket_store_cached_postings_compressed_size_bytes_total{component="<service>"}`
+  * `cortex_<service>_blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component="<service>"}`
+  * `cortex_<service>_blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="<service>"}`
 * [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502
 * [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609
 * [FEATURE] Experimental gRPC Store: Added support to 3rd parties index and chunk stores using gRPC client/server plugin mechanism. #2220
 
@@ -60,6 +60,25 @@ func (s *CompositeHTTPService) WaitSumMetrics(isExpected func(sums ...float64) b
 	return fmt.Errorf("unable to find metrics %s with expected values. Last values: %v", metricNames, sums)
 }
 
+func (s *CompositeHTTPService) WaitSumMetricWithLabels(isExpected func(sums float64) bool, metricName string, expectedLabels map[string]string) error {
+	lastSum := 0.0
+
+	for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
+		lastSum, err := s.SumMetricWithLabels(metricName, expectedLabels)
+		if err != nil {
+			return err
+		}
+
+		if isExpected(lastSum) {
+			return nil
+		}
+
+		s.retryBackoff.Wait()
+	}
+
+	return fmt.Errorf("unable to find metric %s with labels %v with expected value. Last value: %v", metricName, expectedLabels, lastSum)
+}
+
 // SumMetrics returns the sum of the values of each given metric names.
 func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
 	sums := make([]float64, len(metricNames))
@@ -81,3 +100,19 @@ func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, err
 
 	return sums, nil
 }
+
+// SumMetricWithLabels returns the sum of the values of metric with matching labels across all services.
+func (s *CompositeHTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
+	sum := 0.0
+
+	for _, service := range s.services {
+		s, err := service.SumMetricWithLabels(metricName, expectedLabels)
+		if err != nil {
+			return 0, err
+		}
+
+		sum += s
+	}
+
+	return sum, nil
+}
@@ -14,6 +14,7 @@ import (
 
 	"github.com/go-kit/kit/log"
 	"github.com/pkg/errors"
+	dto "github.com/prometheus/client_model/go"
 	"github.com/prometheus/common/expfmt"
 	"github.com/thanos-io/thanos/pkg/runutil"
 
@@ -575,44 +576,74 @@ func (s *HTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
 // wait continues. If no such matching metric can be found or wait times out, function returns error.
 func (s *HTTPService) WaitForMetricWithLabels(okFn func(v float64) bool, metricName string, expectedLabels map[string]string) error {
 	for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
-		metrics, err := s.Metrics()
+		ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
 		if err != nil {
 			return err
 		}
 
-		var tp expfmt.TextParser
-		families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
-		if err != nil {
-			return err
+		for _, m := range ms {
+			if okFn(getValue(m)) {
+				return nil
+			}
 		}
 
-		mf, ok := families[metricName]
-		if !ok {
-			return errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
-		}
+		s.retryBackoff.Wait()
+	}
 
-		for _, m := range mf.GetMetric() {
-			// check if some metric has all required labels
-			metricLabels := map[string]string{}
-			for _, lp := range m.GetLabel() {
-				metricLabels[lp.GetName()] = lp.GetValue()
-			}
+	return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
+}
 
-			matches := true
-			for k, v := range expectedLabels {
-				if mv, ok := metricLabels[k]; !ok || mv != v {
-					matches = false
-					break
-				}
-			}
+// Returns sum of all metrics matching given labels.
+func (s *HTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
+	sum := 0.0
+	ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
+	if err != nil {
+		return 0, err
+	}
 
-			if matches && okFn(getValue(m)) {
-				return nil
+	for _, m := range ms {
+		sum += getValue(m)
+	}
+	return sum, nil
+}
+
+func (s *HTTPService) getMetricsMatchingLabels(metricName string, expectedLabels map[string]string) ([]*dto.Metric, error) {
+	metrics, err := s.Metrics()
+	if err != nil {
+		return nil, err
+	}
+
+	var tp expfmt.TextParser
+	families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
+	if err != nil {
+		return nil, err
+	}
+
+	mf, ok := families[metricName]
+	if !ok {
+		return nil, errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
+	}
+
+	result := []*dto.Metric(nil)
+
+	for _, m := range mf.GetMetric() {
+		// check if some metric has all required labels
+		metricLabels := map[string]string{}
+		for _, lp := range m.GetLabel() {
+			metricLabels[lp.GetName()] = lp.GetValue()
+		}
+
+		matches := true
+		for k, v := range expectedLabels {
+			if mv, ok := metricLabels[k]; !ok || mv != v {
+				matches = false
+				break
 			}
 		}
 
-		s.retryBackoff.Wait()
+		if matches {
+			result = append(result, m)
+		}
 	}
-
-	return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
+	return result, nil
 }