Skip to content

Commit b129d25

Browse files
authored
Don't prefix Thanos index-cache metrics (#2627)
* Removed Cortex-specific metrics for index cache, reuse Thanos metrics (with name="index-cache", and appropriate component) Signed-off-by: Peter Štibraný <[email protected]> * Don't prefix metrics, but use new label instead. Signed-off-by: Peter Štibraný <[email protected]> * Fix tests. Signed-off-by: Peter Štibraný <[email protected]> * Fixed tests after renaming metrics. Signed-off-by: Peter Štibraný <[email protected]> * Add cortex_ prefix to metrics (re-)defined in Cortex Signed-off-by: Peter Štibraný <[email protected]> * Fix integration tests. Signed-off-by: Peter Štibraný <[email protected]> * Fix comment. Signed-off-by: Peter Štibraný <[email protected]> * Fix component name. Signed-off-by: Peter Štibraný <[email protected]> * Added PR number to CHANGELOG.md entries. Signed-off-by: Peter Štibraný <[email protected]>
1 parent 90eceb9 commit b129d25

17 files changed

+411
-798
lines changed

CHANGELOG.md

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,46 @@
55
* [CHANGE] Query Frontend now uses Round Robin to choose a tenant queue to service next. #2553
66
* [CHANGE] `-promql.lookback-delta` is now deprecated and has been replaced by `-querier.lookback-delta` along with `lookback_delta` entry under `querier` in the config file. `-promql.lookback-delta` will be removed in v1.4.0. #2604
77
* [CHANGE] Experimental TSDB: removed `-experimental.tsdb.bucket-store.binary-index-header-enabled` flag. Now the binary index-header is always enabled.
8+
* [CHANGE] Experimental TSDB: Renamed index-cache metrics to use original metric names from Thanos, as Cortex is not aggregating them in any way: #2627
9+
* `cortex_<service>_blocks_index_cache_items_evicted_total` => `thanos_store_index_cache_items_evicted_total{name="index-cache"}`
10+
* `cortex_<service>_blocks_index_cache_items_added_total` => `thanos_store_index_cache_items_added_total{name="index-cache"}`
11+
* `cortex_<service>_blocks_index_cache_requests_total` => `thanos_store_index_cache_requests_total{name="index-cache"}`
12+
* `cortex_<service>_blocks_index_cache_items_overflowed_total` => `thanos_store_index_cache_items_overflowed_total{name="index-cache"}`
13+
* `cortex_<service>_blocks_index_cache_hits_total` => `thanos_store_index_cache_hits_total{name="index-cache"}`
14+
* `cortex_<service>_blocks_index_cache_items` => `thanos_store_index_cache_items{name="index-cache"}`
15+
* `cortex_<service>_blocks_index_cache_items_size_bytes` => `thanos_store_index_cache_items_size_bytes{name="index-cache"}`
16+
* `cortex_<service>_blocks_index_cache_total_size_bytes` => `thanos_store_index_cache_total_size_bytes{name="index-cache"}`
17+
* `cortex_<service>_blocks_index_cache_memcached_operations_total` => `thanos_memcached_operations_total{name="index-cache"}`
18+
* `cortex_<service>_blocks_index_cache_memcached_operation_failures_total` => `thanos_memcached_operation_failures_total{name="index-cache"}`
19+
* `cortex_<service>_blocks_index_cache_memcached_operation_duration_seconds` => `thanos_memcached_operation_duration_seconds{name="index-cache"}`
20+
* `cortex_<service>_blocks_index_cache_memcached_operation_skipped_total` => `thanos_memcached_operation_skipped_total{name="index-cache"}`
21+
* [CHANGE] Experimental TSDB: Renamed metrics in bucket stores: #2627
22+
* `cortex_<service>_blocks_meta_syncs_total` => `cortex_blocks_meta_syncs_total{component="<service>"}`
23+
* `cortex_<service>_blocks_meta_sync_failures_total` => `cortex_blocks_meta_sync_failures_total{component="<service>"}`
24+
* `cortex_<service>_blocks_meta_sync_duration_seconds` => `cortex_blocks_meta_sync_duration_seconds{component="<service>"}`
25+
* `cortex_<service>_blocks_meta_sync_consistency_delay_seconds` => `cortex_blocks_meta_sync_consistency_delay_seconds{component="<service>"}`
26+
* `cortex_<service>_blocks_meta_synced` => `cortex_blocks_meta_synced{component="<service>"}`
27+
* `cortex_<service>_bucket_store_block_loads_total` => `cortex_block_loads_total{component="<service>"}`
28+
* `cortex_<service>_bucket_store_block_load_failures_total` => `cortex_block_load_failures_total{component="<service>"}`
29+
* `cortex_<service>_bucket_store_block_drops_total` => `cortex_block_drops_total{component="<service>"}`
30+
* `cortex_<service>_bucket_store_block_drop_failures_total` => `cortex_bucket_store_block_drop_failures_total{component="<service>"}`
31+
* `cortex_<service>_bucket_store_blocks_loaded` => `cortex_bucket_store_blocks_loaded{component="<service>"}`
32+
* `cortex_<service>_bucket_store_series_data_touched` => `cortex_bucket_store_series_data_touched{component="<service>"}`
33+
* `cortex_<service>_bucket_store_series_data_fetched` => `cortex_bucket_store_series_data_fetched{component="<service>"}`
34+
* `cortex_<service>_bucket_store_series_data_size_touched_bytes` => `cortex_bucket_store_series_data_size_touched_bytes{component="<service>"}`
35+
* `cortex_<service>_bucket_store_series_data_size_fetched_bytes` => `cortex_bucket_store_series_data_size_fetched_bytes{component="<service>"}`
36+
* `cortex_<service>_bucket_store_series_blocks_queried` => `cortex_bucket_store_series_blocks_queried{component="<service>"}`
37+
* `cortex_<service>_bucket_store_series_get_all_duration_seconds` => `cortex_bucket_store_series_get_all_duration_seconds{component="<service>"}`
38+
* `cortex_<service>_bucket_store_series_merge_duration_seconds` => `cortex_bucket_store_series_merge_duration_seconds{component="<service>"}`
39+
* `cortex_<service>_bucket_store_series_refetches_total` => `cortex_bucket_store_series_refetches_total{component="<service>"}`
40+
* `cortex_<service>_bucket_store_series_result_series` => `cortex_bucket_store_series_result_series{component="<service>"}`
41+
* `cortex_<service>_bucket_store_cached_postings_compressions_total` => `cortex_bucket_store_cached_postings_compressions_total{component="<service>"}`
42+
* `cortex_<service>_bucket_store_cached_postings_compression_errors_total` => `cortex_bucket_store_cached_postings_compression_errors_total{component="<service>"}`
43+
* `cortex_<service>_bucket_store_cached_postings_compression_time_seconds` => `cortex_bucket_store_cached_postings_compression_time_seconds{component="<service>"}`
44+
* `cortex_<service>_bucket_store_cached_postings_original_size_bytes_total` => `cortex_bucket_store_cached_postings_original_size_bytes_total{component="<service>"}`
45+
* `cortex_<service>_bucket_store_cached_postings_compressed_size_bytes_total` => `cortex_bucket_store_cached_postings_compressed_size_bytes_total{component="<service>"}`
46+
* `cortex_<service>_blocks_sync_seconds` => `cortex_bucket_stores_blocks_sync_seconds{component="<service>"}`
47+
* `cortex_<service>_blocks_last_successful_sync_timestamp_seconds` => `cortex_bucket_stores_blocks_last_successful_sync_timestamp_seconds{component="<service>"}`
848
* [FEATURE] TLS config options added for GRPC clients in Querier (Query-frontend client & Ingester client), Ruler, Store Gateway, as well as HTTP client in Config store client. #2502
949
* [FEATURE] The flag `-frontend.max-cache-freshness` is now supported within the limits overrides, to specify per-tenant max cache freshness values. The corresponding YAML config parameter has been changed from `results_cache.max_freshness` to `limits_config.max_cache_freshness`. The legacy YAML config parameter (`results_cache.max_freshness`) will continue to be supported till Cortex release `v1.4.0`. #2609
1050
* [FEATURE] Experimental gRPC Store: Added support to 3rd parties index and chunk stores using gRPC client/server plugin mechanism. #2220

integration/e2e/composite_service.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,25 @@ func (s *CompositeHTTPService) WaitSumMetrics(isExpected func(sums ...float64) b
6060
return fmt.Errorf("unable to find metrics %s with expected values. Last values: %v", metricNames, sums)
6161
}
6262

63+
func (s *CompositeHTTPService) WaitSumMetricWithLabels(isExpected func(sums float64) bool, metricName string, expectedLabels map[string]string) error {
64+
lastSum := 0.0
65+
66+
for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
67+
lastSum, err := s.SumMetricWithLabels(metricName, expectedLabels)
68+
if err != nil {
69+
return err
70+
}
71+
72+
if isExpected(lastSum) {
73+
return nil
74+
}
75+
76+
s.retryBackoff.Wait()
77+
}
78+
79+
return fmt.Errorf("unable to find metric %s with labels %v with expected value. Last value: %v", metricName, expectedLabels, lastSum)
80+
}
81+
6382
// SumMetrics returns the sum of the values of each given metric names.
6483
func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
6584
sums := make([]float64, len(metricNames))
@@ -81,3 +100,19 @@ func (s *CompositeHTTPService) SumMetrics(metricNames ...string) ([]float64, err
81100

82101
return sums, nil
83102
}
103+
104+
// SumMetricWithLabels returns the sum of the values of metric with matching labels across all services.
105+
func (s *CompositeHTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
106+
sum := 0.0
107+
108+
for _, service := range s.services {
109+
s, err := service.SumMetricWithLabels(metricName, expectedLabels)
110+
if err != nil {
111+
return 0, err
112+
}
113+
114+
sum += s
115+
}
116+
117+
return sum, nil
118+
}

integration/e2e/service.go

Lines changed: 58 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414

1515
"github.com/go-kit/kit/log"
1616
"github.com/pkg/errors"
17+
dto "github.com/prometheus/client_model/go"
1718
"github.com/prometheus/common/expfmt"
1819
"github.com/thanos-io/thanos/pkg/runutil"
1920

@@ -575,44 +576,74 @@ func (s *HTTPService) SumMetrics(metricNames ...string) ([]float64, error) {
575576
// wait continues. If no such matching metric can be found or wait times out, function returns error.
576577
func (s *HTTPService) WaitForMetricWithLabels(okFn func(v float64) bool, metricName string, expectedLabels map[string]string) error {
577578
for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
578-
metrics, err := s.Metrics()
579+
ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
579580
if err != nil {
580581
return err
581582
}
582583

583-
var tp expfmt.TextParser
584-
families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
585-
if err != nil {
586-
return err
584+
for _, m := range ms {
585+
if okFn(getValue(m)) {
586+
return nil
587+
}
587588
}
588589

589-
mf, ok := families[metricName]
590-
if !ok {
591-
return errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
592-
}
590+
s.retryBackoff.Wait()
591+
}
593592

594-
for _, m := range mf.GetMetric() {
595-
// check if some metric has all required labels
596-
metricLabels := map[string]string{}
597-
for _, lp := range m.GetLabel() {
598-
metricLabels[lp.GetName()] = lp.GetValue()
599-
}
593+
return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
594+
}
600595

601-
matches := true
602-
for k, v := range expectedLabels {
603-
if mv, ok := metricLabels[k]; !ok || mv != v {
604-
matches = false
605-
break
606-
}
607-
}
596+
// Returns sum of all metrics matching given labels.
597+
func (s *HTTPService) SumMetricWithLabels(metricName string, expectedLabels map[string]string) (float64, error) {
598+
sum := 0.0
599+
ms, err := s.getMetricsMatchingLabels(metricName, expectedLabels)
600+
if err != nil {
601+
return 0, err
602+
}
608603

609-
if matches && okFn(getValue(m)) {
610-
return nil
604+
for _, m := range ms {
605+
sum += getValue(m)
606+
}
607+
return sum, nil
608+
}
609+
610+
func (s *HTTPService) getMetricsMatchingLabels(metricName string, expectedLabels map[string]string) ([]*dto.Metric, error) {
611+
metrics, err := s.Metrics()
612+
if err != nil {
613+
return nil, err
614+
}
615+
616+
var tp expfmt.TextParser
617+
families, err := tp.TextToMetricFamilies(strings.NewReader(metrics))
618+
if err != nil {
619+
return nil, err
620+
}
621+
622+
mf, ok := families[metricName]
623+
if !ok {
624+
return nil, errors.Errorf("metric %s not found in %s metric page", metricName, s.name)
625+
}
626+
627+
result := []*dto.Metric(nil)
628+
629+
for _, m := range mf.GetMetric() {
630+
// check if some metric has all required labels
631+
metricLabels := map[string]string{}
632+
for _, lp := range m.GetLabel() {
633+
metricLabels[lp.GetName()] = lp.GetValue()
634+
}
635+
636+
matches := true
637+
for k, v := range expectedLabels {
638+
if mv, ok := metricLabels[k]; !ok || mv != v {
639+
matches = false
640+
break
611641
}
612642
}
613643

614-
s.retryBackoff.Wait()
644+
if matches {
645+
result = append(result, m)
646+
}
615647
}
616-
617-
return fmt.Errorf("unable to find metric %s with labels %v with expected value", metricName, expectedLabels)
648+
return result, nil
618649
}

0 commit comments

Comments
 (0)