Skip to content

Commit b7a3a5d

Browse files
authored
Add more duration info to SG request stats log (#5573)
* include get_all, merge and total latency to store gateway series request log Signed-off-by: Ben Ye <[email protected]> * update thanos and add metrics to cortex Signed-off-by: Ben Ye <[email protected]> * update changelog Signed-off-by: Ben Ye <[email protected]> * update again Signed-off-by: Ben Ye <[email protected]> --------- Signed-off-by: Ben Ye <[email protected]>
1 parent 4e162a0 commit b7a3a5d

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

51 files changed

+594
-1818
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
* [ENHANCEMENT] Querier: Retry store gateway client connection closing gRPC error. #5558
6565
* [ENHANCEMENT] QueryFrontend: Add generic retry for all APIs. #5561.
6666
* [ENHANCEMENT] QueryFrontend: Add metric for number of series requests. #5373
67+
* [ENHANCEMENT] Store Gateway: Add histogram metrics for total time spent fetching series and chunks per request. #5573
6768
* [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265
6869
* [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286
6970
* [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293

go.mod

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,9 @@ require (
5151
github.com/sony/gobreaker v0.5.0
5252
github.com/spf13/afero v1.9.5
5353
github.com/stretchr/testify v1.8.4
54-
github.com/thanos-io/objstore v0.0.0-20230816175749-20395bffdf26
54+
github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed
5555
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e
56-
github.com/thanos-io/thanos v0.32.3-0.20230911095949-f6a39507b6bd
56+
github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0
5757
github.com/uber/jaeger-client-go v2.30.0+incompatible
5858
github.com/weaveworks/common v0.0.0-20221201103051-7c2720a9024d
5959
go.etcd.io/etcd/api/v3 v3.5.9
@@ -117,7 +117,6 @@ require (
117117
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
118118
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
119119
github.com/dennwc/varint v1.0.0 // indirect
120-
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 // indirect
121120
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect
122121
github.com/docker/go-units v0.5.0 // indirect
123122
github.com/edsrzf/mmap-go v1.1.0 // indirect
@@ -190,7 +189,6 @@ require (
190189
github.com/rs/cors v1.9.0 // indirect
191190
github.com/rs/xid v1.5.0 // indirect
192191
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 // indirect
193-
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb // indirect
194192
github.com/sercand/kuberesolver v2.4.0+incompatible // indirect
195193
github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c // indirect
196194
github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546 // indirect
@@ -221,7 +219,7 @@ require (
221219
golang.org/x/exp v0.0.0-20230713183714-613f0c0eb8a1 // indirect
222220
golang.org/x/mod v0.12.0 // indirect
223221
golang.org/x/oauth2 v0.11.0 // indirect
224-
golang.org/x/sys v0.11.0 // indirect
222+
golang.org/x/sys v0.12.0 // indirect
225223
golang.org/x/text v0.12.0 // indirect
226224
golang.org/x/tools v0.11.0 // indirect
227225
golang.org/x/xerrors v0.0.0-20220907171357-04be3eba64a2 // indirect

go.sum

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -549,8 +549,6 @@ github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1
549549
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
550550
github.com/dennwc/varint v1.0.0 h1:kGNFFSSw8ToIy3obO/kKr8U9GZYUAxQEVuix4zfDWzE=
551551
github.com/dennwc/varint v1.0.0/go.mod h1:hnItb35rvZvJrbTALZtY/iQfDs48JKRG1RPpgziApxA=
552-
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165 h1:BS21ZUJ/B5X2UVUbczfmdWH7GapPWAhxcMsDnjJTU1E=
553-
github.com/dgryski/go-metro v0.0.0-20200812162917-85c65e2d0165/go.mod h1:c9O8+fpSOX1DM8cPNSkX/qsBWdkD4yd2dpciOWQjpBw=
554552
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78=
555553
github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc=
556554
github.com/dhui/dktest v0.3.16 h1:i6gq2YQEtcrjKbeJpBkWjE8MmLZPYllcjOFbTZuPDnw=
@@ -1158,8 +1156,6 @@ github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUt
11581156
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
11591157
github.com/segmentio/fasthash v1.0.3 h1:EI9+KE1EwvMLBWwjpRDc+fEM+prwxDYbslddQGtrmhM=
11601158
github.com/segmentio/fasthash v1.0.3/go.mod h1:waKX8l2N8yckOgmSsXJi7x1ZfdKZ4x7KRMzBtS3oedY=
1161-
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb h1:XfLJSPIOUX+osiMraVgIrMR27uMXnRJWGm1+GL8/63U=
1162-
github.com/seiflotfy/cuckoofilter v0.0.0-20220411075957-e3b120b3f5fb/go.mod h1:bR6DqgcAl1zTcOX8/pE2Qkj9XO00eCNqmKb7lXP8EAg=
11631159
github.com/sercand/kuberesolver/v4 v4.0.0 h1:frL7laPDG/lFm5n98ODmWnn+cvPpzlkf3LhzuPhcHP4=
11641160
github.com/sercand/kuberesolver/v4 v4.0.0/go.mod h1:F4RGyuRmMAjeXHKL+w4P7AwUnPceEAPAhxUgXZjKgvM=
11651161
github.com/shurcooL/httpfs v0.0.0-20230704072500-f1e31cf0ba5c h1:aqg5Vm5dwtvL+YgDpBcK1ITf3o96N/K7/wsRXQnUTEs=
@@ -1212,12 +1208,12 @@ github.com/subosito/gotenv v1.4.1/go.mod h1:ayKnFf/c6rvx/2iiLrJUk1e6plDbT3edrFNG
12121208
github.com/tencentyun/cos-go-sdk-v5 v0.7.40 h1:W6vDGKCHe4wBACI1d2UgE6+50sJFhRWU4O8IB2ozzxM=
12131209
github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e h1:f1Zsv7OAU9iQhZwigp50Yl38W10g/vd5NC8Rdk1Jzng=
12141210
github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e/go.mod h1:jXcofnrSln/cLI6/dhlBxPQZEEQHVPCcFaH75M+nSzM=
1215-
github.com/thanos-io/objstore v0.0.0-20230816175749-20395bffdf26 h1:q1lin/af0lw+I3sS79ccHs2CLjFOPc190J9saeQ5qQ4=
1216-
github.com/thanos-io/objstore v0.0.0-20230816175749-20395bffdf26/go.mod h1:oJ82xgcBDzGJrEgUsjlTj6n01+ZWUMMUR8BlZzX5xDE=
1211+
github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed h1:iWQdY3S6DpWjelVvKKSKgS7LeLkhK4VaEnQfphB9ZXA=
1212+
github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed/go.mod h1:oJ82xgcBDzGJrEgUsjlTj6n01+ZWUMMUR8BlZzX5xDE=
12171213
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e h1:kwsFCU8eSkZehbrAN3nXPw5RdMHi/Bok/y8l2C4M+gk=
12181214
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e/go.mod h1:+T/ZYNCGybT6eTsGGvVtGb63nT1cvUmH6MjqRrcQoKw=
1219-
github.com/thanos-io/thanos v0.32.3-0.20230911095949-f6a39507b6bd h1:JAXqwb/nzY7WzijekZrhrL63m988VLyoFUEaKLU15iA=
1220-
github.com/thanos-io/thanos v0.32.3-0.20230911095949-f6a39507b6bd/go.mod h1:J81dp4qaOX+GfPmRoYqu/aZXfEBri7+i3TzY2xamthg=
1215+
github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0 h1:T9Vot+BQao6M6j8F0JQbseAqtniOw1Csz+QHRRRwF48=
1216+
github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0/go.mod h1:Px5Boq60s+2WwR+V4v4oxgmxfw9WHrwMwjRou6pkUNw=
12211217
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU=
12221218
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY=
12231219
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
@@ -1607,8 +1603,8 @@ golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
16071603
golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
16081604
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
16091605
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1610-
golang.org/x/sys v0.11.0 h1:eG7RXZHdqOJ1i+0lgLgCpSXAp6M3LYlAo6osgSi0xOM=
1611-
golang.org/x/sys v0.11.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
1606+
golang.org/x/sys v0.12.0 h1:CM0HF96J0hcLAwsHPJZjfdNzs0gftsLfgKt57wWHJ0o=
1607+
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
16121608
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
16131609
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
16141610
golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=

pkg/querier/blocks_store_queryable.go

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -609,6 +609,7 @@ func (q *blocksStoreQuerier) fetchSeriesFromStores(
609609
return errors.Wrapf(err, "failed to create series request")
610610
}
611611

612+
begin := time.Now()
612613
stream, err := c.Series(gCtx, req)
613614
if err != nil {
614615
if isRetryableError(err) {
@@ -725,10 +726,10 @@ func (q *blocksStoreQuerier) fetchSeriesFromStores(
725726
"requested blocks", strings.Join(convertULIDsToString(blockIDs), " "),
726727
"queried blocks", strings.Join(convertULIDsToString(myQueriedBlocks), " "))
727728

728-
// It is also interesting to look at data downloaded at store gateway even if
729-
// no series got matched, but to reduce verbosity we are more interested in those
730-
// matched case. With vertical sharding enabled it is easy to log too much.
731-
if numSeries > 0 {
729+
// Use number of blocks queried to check whether we should log the query
730+
// or not. It might be logging too much but good to understand per request
731+
// performance.
732+
if seriesQueryStats.BlocksQueried > 0 {
732733
level.Info(spanLog).Log("msg", "store gateway series request stats",
733734
"instance", c.RemoteAddress(),
734735
"queryable_chunk_bytes_fetched", chunkBytes,
@@ -753,6 +754,9 @@ func (q *blocksStoreQuerier) fetchSeriesFromStores(
753754
"chunks_fetch_count", seriesQueryStats.ChunksFetchCount,
754755
"chunks_fetched_size_sum", seriesQueryStats.ChunksFetchedSizeSum,
755756
"data_downloaded_size_sum", seriesQueryStats.DataDownloadedSizeSum,
757+
"get_all_duration", seriesQueryStats.GetAllDuration,
758+
"merge_duration", seriesQueryStats.MergeDuration,
759+
"response_time", time.Since(begin),
756760
)
757761
}
758762

pkg/storegateway/bucket_store_metrics.go

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,9 +38,11 @@ type BucketStoreMetrics struct {
3838
cachedPostingsOriginalSizeBytes *prometheus.Desc
3939
cachedPostingsCompressedSizeBytes *prometheus.Desc
4040

41-
seriesFetchDuration *prometheus.Desc
42-
postingsFetchDuration *prometheus.Desc
43-
chunkFetchDuration *prometheus.Desc
41+
seriesFetchDuration *prometheus.Desc
42+
seriesFetchDurationSum *prometheus.Desc
43+
postingsFetchDuration *prometheus.Desc
44+
chunkFetchDuration *prometheus.Desc
45+
chunkFetchDurationSum *prometheus.Desc
4446

4547
lazyExpandedPostingsCount *prometheus.Desc
4648
lazyExpandedPostingSizeBytes *prometheus.Desc
@@ -160,6 +162,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
160162
"cortex_bucket_store_series_fetch_duration_seconds",
161163
"Time it takes to fetch series to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.",
162164
nil, nil),
165+
seriesFetchDurationSum: prometheus.NewDesc(
166+
"cortex_bucket_store_series_fetch_duration_sum_seconds",
167+
"The time it takes to fetch postings to respond to a request sent to a store gateway. It includes both the time to fetch it from the cache and from storage in case of cache misses.",
168+
nil, nil),
163169
postingsFetchDuration: prometheus.NewDesc(
164170
"cortex_bucket_store_postings_fetch_duration_seconds",
165171
"Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.",
@@ -168,6 +174,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
168174
"cortex_bucket_store_chunks_fetch_duration_seconds",
169175
"The total time spent fetching chunks within a single request a store gateway.",
170176
nil, nil),
177+
chunkFetchDurationSum: prometheus.NewDesc(
178+
"cortex_bucket_store_chunks_fetch_duration_sum_seconds",
179+
"The total absolute time spent fetching chunks within a single request for one block.",
180+
nil, nil),
171181

172182
indexHeaderLazyLoadCount: prometheus.NewDesc(
173183
"cortex_bucket_store_indexheader_lazy_load_total",
@@ -241,8 +251,10 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
241251
out <- m.cachedPostingsCompressedSizeBytes
242252

243253
out <- m.seriesFetchDuration
254+
out <- m.seriesFetchDurationSum
244255
out <- m.postingsFetchDuration
245256
out <- m.chunkFetchDuration
257+
out <- m.chunkFetchDurationSum
246258

247259
out <- m.indexHeaderLazyLoadCount
248260
out <- m.indexHeaderLazyLoadFailedCount
@@ -288,8 +300,10 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
288300
data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressedSizeBytes, "thanos_bucket_store_cached_postings_compressed_size_bytes_total")
289301

290302
data.SendSumOfHistograms(out, m.seriesFetchDuration, "thanos_bucket_store_series_fetch_duration_seconds")
303+
data.SendSumOfHistograms(out, m.seriesFetchDurationSum, "thanos_bucket_store_series_fetch_duration_sum_seconds")
291304
data.SendSumOfHistograms(out, m.postingsFetchDuration, "thanos_bucket_store_postings_fetch_duration_seconds")
292305
data.SendSumOfHistograms(out, m.chunkFetchDuration, "thanos_bucket_store_chunks_fetch_duration_seconds")
306+
data.SendSumOfHistograms(out, m.chunkFetchDurationSum, "thanos_bucket_store_chunks_fetch_duration_sum_seconds")
293307

294308
data.SendSumOfCounters(out, m.indexHeaderLazyLoadCount, "thanos_bucket_store_indexheader_lazy_load_total")
295309
data.SendSumOfCounters(out, m.indexHeaderLazyLoadFailedCount, "thanos_bucket_store_indexheader_lazy_load_failed_total")

0 commit comments

Comments
 (0)