Skip to content

Commit 409f065

Browse files
authored
Add protobuf codec for query range and instant query responses (#5527)
Co-authored-by: Ahmed Hassan <[email protected]>
1 parent 820c3bf commit 409f065

23 files changed

+2124
-258
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## master / unreleased
44

5+
* [FEATURE] Query Frontend/Querier: Add protobuf codec `-api.querier-default-codec` and the option to choose response compression type `-querier.response-compression`. #5527
56
* [CHANGE] Enable Compactor and Alertmanager in target all. #6204
67
* [FEATURE] Ruler: Experimental: Add `ruler.frontend-address` to allow query to query frontends instead of ingesters. #6151
78
* [FEATURE] Ruler: Minimize chances of missed rule group evaluations that can occur due to OOM kills, bad underlying nodes, or due to an unhealthy ruler that appears in the ring as healthy. This feature is enabled via `-ruler.enable-ha-evaluation` flag. #6129

docs/blocks-storage/querier.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,11 @@ querier:
126126
# CLI flag: -querier.per-step-stats-enabled
127127
[per_step_stats_enabled: <boolean> | default = false]
128128

129+
# Use compression for metrics query API or instant and range query APIs.
130+
# Supports 'gzip' and '' (disable compression)
131+
# CLI flag: -querier.response-compression
132+
[response_compression: <string> | default = "gzip"]
133+
129134
# The time after which a metric should be queried from storage and not just
130135
# ingesters. 0 means all queries are sent to store. When running the blocks
131136
# storage, if this option is enabled, the time range of the query sent to the

docs/configuration/config-file-reference.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,11 @@ api:
9595
# CLI flag: -api.build-info-enabled
9696
[build_info_enabled: <boolean> | default = false]
9797

98+
# Choose default codec for querier response serialization. Supports 'json' and
99+
# 'protobuf'.
100+
# CLI flag: -api.querier-default-codec
101+
[querier_default_codec: <string> | default = "json"]
102+
98103
# The server_config configures the HTTP and gRPC server of the launched
99104
# service(s).
100105
[server: <server_config>]
@@ -3718,6 +3723,11 @@ The `querier_config` configures the Cortex querier.
37183723
# CLI flag: -querier.per-step-stats-enabled
37193724
[per_step_stats_enabled: <boolean> | default = false]
37203725
3726+
# Use compression for metrics query API or instant and range query APIs.
3727+
# Supports 'gzip' and '' (disable compression)
3728+
# CLI flag: -querier.response-compression
3729+
[response_compression: <string> | default = "gzip"]
3730+
37213731
# The time after which a metric should be queried from storage and not just
37223732
# ingesters. 0 means all queries are sent to store. When running the blocks
37233733
# storage, if this option is enabled, the time range of the query sent to the

docs/configuration/v1-guarantees.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,4 @@ Currently experimental features are:
115115
- String interning for metrics labels
116116
- Enable string interning for metrics labels by setting `-ingester.labels-string-interning-enabled` on Ingester.
117117
- Query-frontend: query rejection (`-frontend.query-rejection.enabled`)
118+
- Querier: protobuf codec (`-api.querier-default-codec`)

integration/query_frontend_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,26 @@ func TestQueryFrontendWithVerticalShardingQueryScheduler(t *testing.T) {
203203
})
204204
}
205205

206+
func TestQueryFrontendProtobufCodec(t *testing.T) {
207+
runQueryFrontendTest(t, queryFrontendTestConfig{
208+
testMissingMetricName: false,
209+
querySchedulerEnabled: true,
210+
queryStatsEnabled: true,
211+
setup: func(t *testing.T, s *e2e.Scenario) (configFile string, flags map[string]string) {
212+
require.NoError(t, writeFileToSharedDir(s, cortexConfigFile, []byte(BlocksStorageConfig)))
213+
214+
minio := e2edb.NewMinio(9000, BlocksStorageFlags()["-blocks-storage.s3.bucket-name"])
215+
require.NoError(t, s.StartAndWaitReady(minio))
216+
217+
flags = mergeFlags(e2e.EmptyFlags(), map[string]string{
218+
"-api.querier-default-codec": "protobuf",
219+
"-querier.response-compression": "gzip",
220+
})
221+
return cortexConfigFile, flags
222+
},
223+
})
224+
}
225+
206226
func TestQueryFrontendRemoteRead(t *testing.T) {
207227
runQueryFrontendTest(t, queryFrontendTestConfig{
208228
remoteReadEnabled: true,

pkg/api/api.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/go-kit/log/level"
1313
"github.com/grafana/regexp"
1414
"github.com/klauspost/compress/gzhttp"
15+
"github.com/pkg/errors"
1516
"github.com/prometheus/prometheus/model/relabel"
1617
"github.com/prometheus/prometheus/storage"
1718
"github.com/prometheus/prometheus/util/httputil"
@@ -73,13 +74,20 @@ type Config struct {
7374
corsRegexString string `yaml:"cors_origin"`
7475

7576
buildInfoEnabled bool `yaml:"build_info_enabled"`
77+
78+
QuerierDefaultCodec string `yaml:"querier_default_codec"`
7679
}
7780

81+
var (
82+
errUnsupportedDefaultCodec = errors.New("unsupported default codec type. Supported types are 'json' and 'protobuf'")
83+
)
84+
7885
// RegisterFlags adds the flags required to config this to the given FlagSet.
7986
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
8087
f.BoolVar(&cfg.ResponseCompression, "api.response-compression-enabled", false, "Use GZIP compression for API responses. Some endpoints serve large YAML or JSON blobs which can benefit from compression.")
8188
f.Var(&cfg.HTTPRequestHeadersToLog, "api.http-request-headers-to-log", "Which HTTP Request headers to add to logs")
8289
f.BoolVar(&cfg.buildInfoEnabled, "api.build-info-enabled", false, "If enabled, build Info API will be served by query frontend or querier.")
90+
f.StringVar(&cfg.QuerierDefaultCodec, "api.querier-default-codec", "json", "Choose default codec for querier response serialization. Supports 'json' and 'protobuf'.")
8391
cfg.RegisterFlagsWithPrefix("", f)
8492
}
8593

@@ -90,6 +98,14 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
9098
f.StringVar(&cfg.corsRegexString, prefix+"server.cors-origin", ".*", `Regex for CORS origin. It is fully anchored. Example: 'https?://(domain1|domain2)\.com'`)
9199
}
92100

101+
// validate config
102+
func (cfg *Config) Validate() error {
103+
if cfg.QuerierDefaultCodec != "json" && cfg.QuerierDefaultCodec != "protobuf" {
104+
return errUnsupportedDefaultCodec
105+
}
106+
return nil
107+
}
108+
93109
// Push either wraps the distributor push function as configured or returns the distributor push directly.
94110
func (cfg *Config) wrapDistributorPush(d *distributor.Distributor) push.Func {
95111
if cfg.DistributorPushWrapper != nil {

pkg/api/handlers.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/weaveworks/common/middleware"
2727

2828
"github.com/cortexproject/cortex/pkg/querier"
29+
"github.com/cortexproject/cortex/pkg/querier/codec"
2930
"github.com/cortexproject/cortex/pkg/querier/stats"
3031
"github.com/cortexproject/cortex/pkg/util"
3132
)
@@ -231,6 +232,9 @@ func NewQuerierHandler(
231232
false,
232233
)
233234

235+
// JSON codec is already installed. Install Protobuf codec to give the option for using either.
236+
api.InstallCodec(codec.ProtobufCodec{})
237+
234238
router := mux.NewRouter()
235239

236240
// Use a separate metric for the querier in order to differentiate requests from the query-frontend when

pkg/cortex/cortex.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,9 @@ func (c *Config) Validate(log log.Logger) error {
182182
return errInvalidHTTPPrefix
183183
}
184184

185+
if err := c.API.Validate(); err != nil {
186+
return errors.Wrap(err, "invalid api config")
187+
}
185188
if err := c.Storage.Validate(); err != nil {
186189
return errors.Wrap(err, "invalid storage config")
187190
}

pkg/cortex/modules.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -442,9 +442,9 @@ func (t *Cortex) initFlusher() (serv services.Service, err error) {
442442
func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err error) {
443443
queryAnalyzer := querysharding.NewQueryAnalyzer()
444444
// PrometheusCodec is a codec to encode and decode Prometheus query range requests and responses.
445-
prometheusCodec := queryrange.NewPrometheusCodec(false)
445+
prometheusCodec := queryrange.NewPrometheusCodec(false, t.Cfg.Querier.ResponseCompression, t.Cfg.API.QuerierDefaultCodec)
446446
// ShardedPrometheusCodec is same as PrometheusCodec but to be used on the sharded queries (it sum up the stats)
447-
shardedPrometheusCodec := queryrange.NewPrometheusCodec(true)
447+
shardedPrometheusCodec := queryrange.NewPrometheusCodec(true, t.Cfg.Querier.ResponseCompression, t.Cfg.API.QuerierDefaultCodec)
448448

449449
queryRangeMiddlewares, cache, err := queryrange.Middlewares(
450450
t.Cfg.QueryRange,
@@ -472,7 +472,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
472472
queryRangeMiddlewares,
473473
instantQueryMiddlewares,
474474
prometheusCodec,
475-
instantquery.InstantQueryCodec,
475+
instantquery.NewInstantQueryCodec(t.Cfg.Querier.ResponseCompression, t.Cfg.API.QuerierDefaultCodec),
476476
t.Overrides,
477477
queryAnalyzer,
478478
t.Cfg.Querier.DefaultEvaluationInterval,

pkg/querier/codec/protobuf_codec.go

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,161 @@
1+
package codec
2+
3+
import (
4+
"github.com/gogo/protobuf/proto"
5+
jsoniter "github.com/json-iterator/go"
6+
"github.com/prometheus/common/model"
7+
"github.com/prometheus/prometheus/promql"
8+
"github.com/prometheus/prometheus/util/stats"
9+
v1 "github.com/prometheus/prometheus/web/api/v1"
10+
11+
"github.com/cortexproject/cortex/pkg/cortexpb"
12+
"github.com/cortexproject/cortex/pkg/querier/tripperware"
13+
)
14+
15+
type ProtobufCodec struct{}
16+
17+
func (p ProtobufCodec) ContentType() v1.MIMEType {
18+
return v1.MIMEType{Type: "application", SubType: "x-protobuf"}
19+
}
20+
21+
func (p ProtobufCodec) CanEncode(resp *v1.Response) bool {
22+
// Errors are parsed by default json codec
23+
if resp.Error != "" || resp.Data == nil {
24+
return false
25+
}
26+
return true
27+
}
28+
29+
func (p ProtobufCodec) Encode(resp *v1.Response) ([]byte, error) {
30+
prometheusQueryResponse, err := createPrometheusQueryResponse(resp)
31+
if err != nil {
32+
return []byte{}, err
33+
}
34+
b, err := proto.Marshal(prometheusQueryResponse)
35+
return b, err
36+
}
37+
38+
func createPrometheusQueryResponse(resp *v1.Response) (*tripperware.PrometheusResponse, error) {
39+
var data = resp.Data.(*v1.QueryData)
40+
41+
var queryResult tripperware.PrometheusQueryResult
42+
switch string(data.ResultType) {
43+
case model.ValMatrix.String():
44+
queryResult.Result = &tripperware.PrometheusQueryResult_Matrix{
45+
Matrix: &tripperware.Matrix{
46+
SampleStreams: *getMatrixSampleStreams(data),
47+
},
48+
}
49+
case model.ValVector.String():
50+
queryResult.Result = &tripperware.PrometheusQueryResult_Vector{
51+
Vector: &tripperware.Vector{
52+
Samples: *getVectorSamples(data),
53+
},
54+
}
55+
default:
56+
json := jsoniter.ConfigCompatibleWithStandardLibrary
57+
rawBytes, err := json.Marshal(data)
58+
if err != nil {
59+
return nil, err
60+
}
61+
queryResult.Result = &tripperware.PrometheusQueryResult_RawBytes{RawBytes: rawBytes}
62+
}
63+
64+
var stats *tripperware.PrometheusResponseStats
65+
if data.Stats != nil {
66+
builtin := data.Stats.Builtin()
67+
stats = &tripperware.PrometheusResponseStats{Samples: getStats(&builtin)}
68+
}
69+
70+
return &tripperware.PrometheusResponse{
71+
Status: string(resp.Status),
72+
Data: tripperware.PrometheusData{
73+
ResultType: string(data.ResultType),
74+
Result: queryResult,
75+
Stats: stats,
76+
},
77+
ErrorType: string(resp.ErrorType),
78+
Error: resp.Error,
79+
Warnings: resp.Warnings,
80+
}, nil
81+
}
82+
83+
func getMatrixSampleStreams(data *v1.QueryData) *[]tripperware.SampleStream {
84+
sampleStreamsLen := len(data.Result.(promql.Matrix))
85+
sampleStreams := make([]tripperware.SampleStream, sampleStreamsLen)
86+
87+
for i := 0; i < sampleStreamsLen; i++ {
88+
labelsLen := len(data.Result.(promql.Matrix)[i].Metric)
89+
var labels []cortexpb.LabelAdapter
90+
if labelsLen > 0 {
91+
labels = make([]cortexpb.LabelAdapter, labelsLen)
92+
for j := 0; j < labelsLen; j++ {
93+
labels[j] = cortexpb.LabelAdapter{
94+
Name: data.Result.(promql.Matrix)[i].Metric[j].Name,
95+
Value: data.Result.(promql.Matrix)[i].Metric[j].Value,
96+
}
97+
}
98+
}
99+
100+
samplesLen := len(data.Result.(promql.Matrix)[i].Floats)
101+
var samples []cortexpb.Sample
102+
if samplesLen > 0 {
103+
samples = make([]cortexpb.Sample, samplesLen)
104+
for j := 0; j < samplesLen; j++ {
105+
samples[j] = cortexpb.Sample{
106+
Value: data.Result.(promql.Matrix)[i].Floats[j].F,
107+
TimestampMs: data.Result.(promql.Matrix)[i].Floats[j].T,
108+
}
109+
}
110+
}
111+
sampleStreams[i] = tripperware.SampleStream{Labels: labels, Samples: samples}
112+
}
113+
return &sampleStreams
114+
}
115+
116+
func getVectorSamples(data *v1.QueryData) *[]tripperware.Sample {
117+
vectorSamplesLen := len(data.Result.(promql.Vector))
118+
vectorSamples := make([]tripperware.Sample, vectorSamplesLen)
119+
120+
for i := 0; i < vectorSamplesLen; i++ {
121+
labelsLen := len(data.Result.(promql.Vector)[i].Metric)
122+
var labels []cortexpb.LabelAdapter
123+
if labelsLen > 0 {
124+
labels = make([]cortexpb.LabelAdapter, labelsLen)
125+
for j := 0; j < labelsLen; j++ {
126+
labels[j] = cortexpb.LabelAdapter{
127+
Name: data.Result.(promql.Vector)[i].Metric[j].Name,
128+
Value: data.Result.(promql.Vector)[i].Metric[j].Value,
129+
}
130+
}
131+
}
132+
133+
vectorSamples[i] = tripperware.Sample{
134+
Labels: labels,
135+
Sample: &cortexpb.Sample{
136+
TimestampMs: data.Result.(promql.Vector)[i].T,
137+
Value: data.Result.(promql.Vector)[i].F,
138+
},
139+
}
140+
}
141+
return &vectorSamples
142+
}
143+
144+
func getStats(builtin *stats.BuiltinStats) *tripperware.PrometheusResponseSamplesStats {
145+
queryableSamplesStatsPerStepLen := len(builtin.Samples.TotalQueryableSamplesPerStep)
146+
queryableSamplesStatsPerStep := make([]*tripperware.PrometheusResponseQueryableSamplesStatsPerStep, queryableSamplesStatsPerStepLen)
147+
for i := 0; i < queryableSamplesStatsPerStepLen; i++ {
148+
queryableSamplesStatsPerStep[i] = &tripperware.PrometheusResponseQueryableSamplesStatsPerStep{
149+
Value: builtin.Samples.TotalQueryableSamplesPerStep[i].V,
150+
TimestampMs: builtin.Samples.TotalQueryableSamplesPerStep[i].T,
151+
}
152+
}
153+
154+
statSamples := tripperware.PrometheusResponseSamplesStats{
155+
TotalQueryableSamples: builtin.Samples.TotalQueryableSamples,
156+
TotalQueryableSamplesPerStep: queryableSamplesStatsPerStep,
157+
PeakSamples: int64(builtin.Samples.PeakSamples),
158+
}
159+
160+
return &statSamples
161+
}

pkg/querier/querier.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ type Config struct {
5050
QueryIngestersWithin time.Duration `yaml:"query_ingesters_within"`
5151
EnablePerStepStats bool `yaml:"per_step_stats_enabled"`
5252

53+
// Use compression for metrics query API or instant and range query APIs.
54+
ResponseCompression string `yaml:"response_compression"`
55+
5356
// QueryStoreAfter the time after which queries should also be sent to the store and not just ingesters.
5457
QueryStoreAfter time.Duration `yaml:"query_store_after"`
5558
MaxQueryIntoFuture time.Duration `yaml:"max_query_into_future"`
@@ -90,6 +93,7 @@ var (
9093
errBadLookbackConfigs = errors.New("bad settings, query_store_after >= query_ingesters_within which can result in queries not being sent")
9194
errShuffleShardingLookbackLessThanQueryStoreAfter = errors.New("the shuffle-sharding lookback period should be greater or equal than the configured 'query store after'")
9295
errEmptyTimeRange = errors.New("empty time range")
96+
errUnsupportedResponseCompression = errors.New("unsupported response compression. Supported compression 'gzip' and '' (disable compression)")
9397
)
9498

9599
// RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -111,6 +115,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
111115
f.IntVar(&cfg.MaxSamples, "querier.max-samples", 50e6, "Maximum number of samples a single query can load into memory.")
112116
f.DurationVar(&cfg.QueryIngestersWithin, "querier.query-ingesters-within", 0, "Maximum lookback beyond which queries are not sent to ingester. 0 means all queries are sent to ingester.")
113117
f.BoolVar(&cfg.EnablePerStepStats, "querier.per-step-stats-enabled", false, "Enable returning samples stats per steps in query response.")
118+
f.StringVar(&cfg.ResponseCompression, "querier.response-compression", "gzip", "Use compression for metrics query API or instant and range query APIs. Supports 'gzip' and '' (disable compression)")
114119
f.DurationVar(&cfg.MaxQueryIntoFuture, "querier.max-query-into-future", 10*time.Minute, "Maximum duration into the future you can query. 0 to disable.")
115120
f.DurationVar(&cfg.DefaultEvaluationInterval, "querier.default-evaluation-interval", time.Minute, "The default evaluation interval or step size for subqueries.")
116121
f.DurationVar(&cfg.QueryStoreAfter, "querier.query-store-after", 0, "The time after which a metric should be queried from storage and not just ingesters. 0 means all queries are sent to store. When running the blocks storage, if this option is enabled, the time range of the query sent to the store will be manipulated to ensure the query end is not more recent than 'now - query-store-after'.")
@@ -133,6 +138,10 @@ func (cfg *Config) Validate() error {
133138
}
134139
}
135140

141+
if cfg.ResponseCompression != "" && cfg.ResponseCompression != "gzip" {
142+
return errUnsupportedResponseCompression
143+
}
144+
136145
if cfg.ShuffleShardingIngestersLookbackPeriod > 0 {
137146
if cfg.ShuffleShardingIngestersLookbackPeriod < cfg.QueryStoreAfter {
138147
return errShuffleShardingLookbackLessThanQueryStoreAfter

0 commit comments

Comments
 (0)