Skip to content

Commit d8230cf

Browse files
committed
check context in multi level cache and add histogram to measure latency
Signed-off-by: Ben Ye <[email protected]> add backfill histogram Signed-off-by: Ben Ye <[email protected]> update changelog Signed-off-by: Ben Ye <[email protected]> lint Signed-off-by: Ben Ye <[email protected]>
1 parent f16bb49 commit d8230cf

File tree

4 files changed

+58
-5
lines changed

4 files changed

+58
-5
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
* [ENHANCEMENT] Querier: Check context before notifying scheduler and frontend. #5565
7171
* [ENHANCEMENT] QueryFrontend: Add metric for number of series requests. #5373
7272
* [ENHANCEMENT] Store Gateway: Add histogram metrics for total time spent fetching series and chunks per request. #5573
73+
* [ENHANCEMENT] Store Gateway: Check context in multi level cache. Add `cortex_store_multilevel_index_cache_fetch_duration_seconds` and `cortex_store_multilevel_index_cache_backfill_duration_seconds` to measure fetch and backfill latency. #5596
7374
* [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
7475
* [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
7576
* [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293

pkg/storage/tsdb/index_cache.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,7 @@ func NewIndexCache(cfg IndexCacheConfig, logger log.Logger, registerer prometheu
205205
}
206206
}
207207

208-
return newMultiLevelCache(caches...), nil
208+
return newMultiLevelCache(registerer, caches...), nil
209209
}
210210

211211
func newInMemoryIndexCache(cfg InMemoryIndexCacheConfig, logger log.Logger, registerer prometheus.Registerer) (storecache.IndexCache, error) {

pkg/storage/tsdb/multilevel_cache.go

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,24 @@ import (
55
"sync"
66

77
"github.com/oklog/ulid"
8+
"github.com/prometheus/client_golang/prometheus"
9+
"github.com/prometheus/client_golang/prometheus/promauto"
810
"github.com/prometheus/prometheus/model/labels"
911
"github.com/prometheus/prometheus/storage"
1012
storecache "github.com/thanos-io/thanos/pkg/store/cache"
1113
)
1214

15+
const (
16+
cacheTypePostings string = "Postings"
17+
cacheTypeExpandedPostings string = "ExpandedPostings"
18+
cacheTypeSeries string = "Series"
19+
)
20+
1321
type multiLevelCache struct {
1422
caches []storecache.IndexCache
23+
24+
fetchLatency *prometheus.HistogramVec
25+
backFillLatency *prometheus.HistogramVec
1526
}
1627

1728
func (m *multiLevelCache) StorePostings(blockID ulid.ULID, l labels.Label, v []byte, tenant string) {
@@ -28,11 +39,17 @@ func (m *multiLevelCache) StorePostings(blockID ulid.ULID, l labels.Label, v []b
2839
}
2940

3041
func (m *multiLevelCache) FetchMultiPostings(ctx context.Context, blockID ulid.ULID, keys []labels.Label, tenant string) (hits map[labels.Label][]byte, misses []labels.Label) {
42+
timer := prometheus.NewTimer(m.fetchLatency.WithLabelValues(cacheTypePostings))
43+
defer timer.ObserveDuration()
44+
3145
misses = keys
3246
hits = map[labels.Label][]byte{}
3347
backfillMap := map[storecache.IndexCache][]map[labels.Label][]byte{}
3448
for i, c := range m.caches {
3549
backfillMap[c] = []map[labels.Label][]byte{}
50+
if ctx.Err() != nil {
51+
return
52+
}
3653
h, mi := c.FetchMultiPostings(ctx, blockID, misses, tenant)
3754
misses = mi
3855

@@ -50,9 +67,14 @@ func (m *multiLevelCache) FetchMultiPostings(ctx context.Context, blockID ulid.U
5067
}
5168

5269
defer func() {
70+
backFillTimer := prometheus.NewTimer(m.backFillLatency.WithLabelValues(cacheTypePostings))
71+
defer backFillTimer.ObserveDuration()
5372
for cache, hit := range backfillMap {
5473
for _, values := range hit {
5574
for l, b := range values {
75+
if ctx.Err() != nil {
76+
return
77+
}
5678
cache.StorePostings(blockID, l, b, tenant)
5779
}
5880
}
@@ -76,10 +98,18 @@ func (m *multiLevelCache) StoreExpandedPostings(blockID ulid.ULID, matchers []*l
7698
}
7799

78100
func (m *multiLevelCache) FetchExpandedPostings(ctx context.Context, blockID ulid.ULID, matchers []*labels.Matcher, tenant string) ([]byte, bool) {
101+
timer := prometheus.NewTimer(m.fetchLatency.WithLabelValues(cacheTypeExpandedPostings))
102+
defer timer.ObserveDuration()
103+
79104
for i, c := range m.caches {
105+
if ctx.Err() != nil {
106+
return nil, false
107+
}
80108
if d, h := c.FetchExpandedPostings(ctx, blockID, matchers, tenant); h {
81109
if i > 0 {
110+
backFillTimer := prometheus.NewTimer(m.backFillLatency.WithLabelValues(cacheTypeExpandedPostings))
82111
m.caches[i-1].StoreExpandedPostings(blockID, matchers, d, tenant)
112+
backFillTimer.ObserveDuration()
83113
}
84114
return d, h
85115
}
@@ -102,12 +132,18 @@ func (m *multiLevelCache) StoreSeries(blockID ulid.ULID, id storage.SeriesRef, v
102132
}
103133

104134
func (m *multiLevelCache) FetchMultiSeries(ctx context.Context, blockID ulid.ULID, ids []storage.SeriesRef, tenant string) (hits map[storage.SeriesRef][]byte, misses []storage.SeriesRef) {
135+
timer := prometheus.NewTimer(m.fetchLatency.WithLabelValues(cacheTypeSeries))
136+
defer timer.ObserveDuration()
137+
105138
misses = ids
106139
hits = map[storage.SeriesRef][]byte{}
107140
backfillMap := map[storecache.IndexCache][]map[storage.SeriesRef][]byte{}
108141

109142
for i, c := range m.caches {
110143
backfillMap[c] = []map[storage.SeriesRef][]byte{}
144+
if ctx.Err() != nil {
145+
return
146+
}
111147
h, miss := c.FetchMultiSeries(ctx, blockID, misses, tenant)
112148
misses = miss
113149

@@ -125,9 +161,14 @@ func (m *multiLevelCache) FetchMultiSeries(ctx context.Context, blockID ulid.ULI
125161
}
126162

127163
defer func() {
164+
backFillTimer := prometheus.NewTimer(m.backFillLatency.WithLabelValues(cacheTypeSeries))
165+
defer backFillTimer.ObserveDuration()
128166
for cache, hit := range backfillMap {
129167
for _, values := range hit {
130168
for m, b := range values {
169+
if ctx.Err() != nil {
170+
return
171+
}
131172
cache.StoreSeries(blockID, m, b, tenant)
132173
}
133174
}
@@ -137,11 +178,21 @@ func (m *multiLevelCache) FetchMultiSeries(ctx context.Context, blockID ulid.ULI
137178
return hits, misses
138179
}
139180

140-
func newMultiLevelCache(c ...storecache.IndexCache) storecache.IndexCache {
181+
func newMultiLevelCache(reg prometheus.Registerer, c ...storecache.IndexCache) storecache.IndexCache {
141182
if len(c) == 1 {
142183
return c[0]
143184
}
144185
return &multiLevelCache{
145186
caches: c,
187+
fetchLatency: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
188+
Name: "cortex_store_multilevel_index_cache_fetch_duration_seconds",
189+
Help: "Histogram to track latency to fetch items from multi level index cache",
190+
Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10, 15, 20, 25, 30, 40, 50, 60, 90},
191+
}, []string{"item_type"}),
192+
backFillLatency: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{
193+
Name: "cortex_store_multilevel_index_cache_backfill_duration_seconds",
194+
Help: "Histogram to track latency to backfill items from multi level index cache",
195+
Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10, 15, 20, 25, 30, 40, 50, 60, 90},
196+
}, []string{"item_type"}),
146197
}
147198
}

pkg/storage/tsdb/multilevel_cache_test.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func Test_MultiIndexCacheInstantiation(t *testing.T) {
4343
},
4444
},
4545
},
46-
expectedType: newMultiLevelCache(),
46+
expectedType: &multiLevelCache{},
4747
},
4848
"instantiate multiples backends - inmemory/memcached": {
4949
cfg: IndexCacheConfig{
@@ -55,7 +55,7 @@ func Test_MultiIndexCacheInstantiation(t *testing.T) {
5555
},
5656
},
5757
},
58-
expectedType: newMultiLevelCache(),
58+
expectedType: &multiLevelCache{},
5959
},
6060
"should not allow duplicate backends": {
6161
cfg: IndexCacheConfig{
@@ -256,7 +256,8 @@ func Test_MultiLevelCache(t *testing.T) {
256256
t.Run(name, func(t *testing.T) {
257257
m1 := newMockIndexCache(tc.m1MockedCalls)
258258
m2 := newMockIndexCache(tc.m2MockedCalls)
259-
c := newMultiLevelCache(m1, m2)
259+
reg := prometheus.NewRegistry()
260+
c := newMultiLevelCache(reg, m1, m2)
260261
tc.call(c)
261262
require.Equal(t, tc.m1ExpectedCalls, m1.calls)
262263
require.Equal(t, tc.m2ExpectedCalls, m2.calls)

0 commit comments

Comments
 (0)