Skip to content

fix remote read error in query frontend #5257

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@

## master / unreleased
* [CHANGE] Store gateways summary metrics have been converted to histograms `cortex_bucket_store_series_blocks_queried`, `cortex_bucket_store_series_data_fetched`, `cortex_bucket_store_series_data_size_touched_bytes`, `cortex_bucket_store_series_data_size_fetched_bytes`, `cortex_bucket_store_series_data_touched`, `cortex_bucket_store_series_result_series` #5239

* [ENHANCEMENT] Querier: Batch Iterator optimization to prevent transversing it multiple times query ranges steps does not overlap. #5237
* [BUGFIX] Catch context error in the s3 bucket client. #5240
* [BUGFIX] Fix query frontend remote read empty body. #5257

## 1.15.0 in progress

Expand Down
69 changes: 69 additions & 0 deletions integration/e2ecortex/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,11 @@ import (
promapi "github.com/prometheus/client_golang/api"
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
"github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/labels"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/prometheus/prometheus/prompb"
"github.com/prometheus/prometheus/storage"
"github.com/prometheus/prometheus/storage/remote"
yaml "gopkg.in/yaml.v3"

"github.com/cortexproject/cortex/pkg/ruler"
Expand Down Expand Up @@ -153,6 +156,72 @@ func (c *Client) QueryRaw(query string) (*http.Response, []byte, error) {
return c.query(addr)
}

// RemoteRead runs a remote read query.
func (c *Client) RemoteRead(matchers []*labels.Matcher, start, end time.Time, step time.Duration) (*prompb.ReadResponse, error) {
startMs := start.UnixMilli()
endMs := end.UnixMilli()
stepMs := step.Milliseconds()

q, err := remote.ToQuery(startMs, endMs, matchers, &storage.SelectHints{
Step: stepMs,
Start: startMs,
End: endMs,
})
if err != nil {
return nil, err
}

req := &prompb.ReadRequest{
Queries: []*prompb.Query{q},
AcceptedResponseTypes: []prompb.ReadRequest_ResponseType{prompb.ReadRequest_STREAMED_XOR_CHUNKS},
}

data, err := proto.Marshal(req)
if err != nil {
return nil, err
}
compressed := snappy.Encode(nil, data)

// Call the remote read API endpoint with a timeout.
httpReqCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()

httpReq, err := http.NewRequestWithContext(httpReqCtx, "POST", "http://"+c.querierAddress+"/prometheus/api/v1/read", bytes.NewReader(compressed))
if err != nil {
return nil, err
}
httpReq.Header.Set("X-Scope-OrgID", "user-1")
httpReq.Header.Add("Content-Encoding", "snappy")
httpReq.Header.Add("Accept-Encoding", "snappy")
httpReq.Header.Set("Content-Type", "application/x-protobuf")
httpReq.Header.Set("User-Agent", "Prometheus/1.8.2")
httpReq.Header.Set("X-Prometheus-Remote-Read-Version", "0.1.0")

httpResp, err := c.httpClient.Do(httpReq)
if err != nil {
return nil, err
}
if httpResp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("unexpected status code %d", httpResp.StatusCode)
}

compressed, err = io.ReadAll(httpResp.Body)
if err != nil {
return nil, err
}

uncompressed, err := snappy.Decode(nil, compressed)
if err != nil {
return nil, err
}

var resp prompb.ReadResponse
if err = proto.Unmarshal(uncompressed, &resp); err != nil {
return nil, err
}
return &resp, nil
}

func (c *Client) query(addr string) (*http.Response, []byte, error) {
ctx, cancel := context.WithTimeout(context.Background(), c.timeout)
defer cancel()
Expand Down
30 changes: 30 additions & 0 deletions integration/query_frontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ type queryFrontendTestConfig struct {
testMissingMetricName bool
querySchedulerEnabled bool
queryStatsEnabled bool
remoteReadEnabled bool
setup func(t *testing.T, s *e2e.Scenario) (configFile string, flags map[string]string)
}

Expand Down Expand Up @@ -194,6 +195,19 @@ func TestQueryFrontendWithVerticalShardingQueryScheduler(t *testing.T) {
})
}

func TestQueryFrontendRemoteRead(t *testing.T) {
runQueryFrontendTest(t, queryFrontendTestConfig{
remoteReadEnabled: true,
setup: func(t *testing.T, s *e2e.Scenario) (configFile string, flags map[string]string) {
require.NoError(t, writeFileToSharedDir(s, cortexConfigFile, []byte(BlocksStorageConfig)))

minio := e2edb.NewMinio(9000, BlocksStorageFlags()["-blocks-storage.s3.bucket-name"])
require.NoError(t, s.StartAndWaitReady(minio))
return cortexConfigFile, flags
},
})
}

func runQueryFrontendTest(t *testing.T, cfg queryFrontendTestConfig) {
const numUsers = 10
const numQueriesPerUser = 10
Expand Down Expand Up @@ -307,6 +321,18 @@ func runQueryFrontendTest(t *testing.T, cfg queryFrontendTestConfig) {
require.Regexp(t, "querier_wall_time;dur=[0-9.]*, response_time;dur=[0-9.]*$", res.Header.Values("Server-Timing")[0])
}

// No need to repeat the test on remote read for each user.
if userID == 0 && cfg.remoteReadEnabled {
start := now.Add(-1 * time.Hour)
end := now.Add(1 * time.Hour)
res, err := c.RemoteRead([]*labels.Matcher{labels.MustNewMatcher(labels.MatchEqual, labels.MetricName, "series_1")}, start, end, time.Second)
require.NoError(t, err)
require.True(t, len(res.Results) > 0)
require.True(t, len(res.Results[0].Timeseries) > 0)
require.True(t, len(res.Results[0].Timeseries[0].Samples) > 0)
require.True(t, len(res.Results[0].Timeseries[0].Labels) > 0)
}

// In this test we do ensure that the /series start/end time is ignored and Cortex
// always returns series in ingesters memory. No need to repeat it for each user.
if userID == 0 {
Expand Down Expand Up @@ -342,6 +368,10 @@ func runQueryFrontendTest(t *testing.T, cfg queryFrontendTestConfig) {
extra++
}

if cfg.remoteReadEnabled {
extra++
}

require.NoError(t, queryFrontend.WaitSumMetrics(e2e.Equals(numUsers*numQueriesPerUser+extra), "cortex_query_frontend_queries_total"))

// The number of received request is greater than the query requests because include
Expand Down
11 changes: 7 additions & 4 deletions pkg/frontend/transport/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,11 +139,14 @@ func (f *Handler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
r.Body = io.NopCloser(io.TeeReader(r.Body, &buf))
// We parse form here so that we can use buf as body, in order to
// prevent https://github.com/cortexproject/cortex/issues/5201.
if err := r.ParseForm(); err != nil {
writeError(w, err)
return
// Exclude remote read here as we don't have to buffer its body.
if !strings.Contains(r.URL.Path, "api/v1/read") {
if err := r.ParseForm(); err != nil {
writeError(w, err)
return
}
r.Body = io.NopCloser(&buf)
}
r.Body = io.NopCloser(&buf)

startTime := time.Now()
resp, err := f.roundTripper.RoundTrip(r)
Expand Down