Skip to content

Commit 8d8975c

Browse files
authored
Query-tee: added a small tolerance to floating point sample values comparison (#2994)
* Query-tee: added a small tolerance to floating point sample values comparison Signed-off-by: Marco Pracucci <[email protected]> * Fixed test Signed-off-by: Marco Pracucci <[email protected]> * Fixed NaN comparison Signed-off-by: Marco Pracucci <[email protected]>
1 parent d9f9ada commit 8d8975c

File tree

7 files changed

+112
-38
lines changed

7 files changed

+112
-38
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
## master / unreleased
44

5+
* [ENHANCEMENT] Query-tee: added a small tolerance to floating point sample values comparison. #2994
56

67
## 1.3.0 in progress
78

cmd/query-tee/main.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ func main() {
4444
}
4545

4646
// Run the proxy.
47-
proxy, err := querytee.NewProxy(cfg.ProxyConfig, util.Logger, cortexReadRoutes(cfg.PathPrefix), registry)
47+
proxy, err := querytee.NewProxy(cfg.ProxyConfig, util.Logger, cortexReadRoutes(cfg), registry)
4848
if err != nil {
4949
level.Error(util.Logger).Log("msg", "Unable to initialize the proxy", "err", err.Error())
5050
os.Exit(1)
@@ -58,13 +58,15 @@ func main() {
5858
proxy.Await()
5959
}
6060

61-
func cortexReadRoutes(prefix string) []querytee.Route {
61+
func cortexReadRoutes(cfg Config) []querytee.Route {
62+
prefix := cfg.PathPrefix
63+
6264
// Strip trailing slashes.
6365
for len(prefix) > 0 && prefix[len(prefix)-1] == '/' {
6466
prefix = prefix[:len(prefix)-1]
6567
}
6668

67-
samplesComparator := querytee.NewSamplesComparator()
69+
samplesComparator := querytee.NewSamplesComparator(cfg.ProxyConfig.ValueComparisonTolerance)
6870
return []querytee.Route{
6971
{Path: prefix + "/api/v1/query", RouteName: "api_v1_query", Methods: []string{"GET"}, ResponseComparator: samplesComparator},
7072
{Path: prefix + "/api/v1/query_range", RouteName: "api_v1_query_range", Methods: []string{"GET"}, ResponseComparator: samplesComparator},

cmd/query-tee/main_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ import (
88
)
99

1010
func TestCortexReadRoutes(t *testing.T) {
11-
routes := cortexReadRoutes("")
11+
routes := cortexReadRoutes(Config{PathPrefix: ""})
1212
for _, r := range routes {
1313
assert.True(t, strings.HasPrefix(r.Path, "/api/v1/"))
1414
}
1515

16-
routes = cortexReadRoutes("/some/random/prefix///")
16+
routes = cortexReadRoutes(Config{PathPrefix: "/some/random/prefix///"})
1717
for _, r := range routes {
1818
assert.True(t, strings.HasPrefix(r.Path, "/some/random/prefix/api/v1/"))
1919
}

docs/operations/query-tee.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ _Note: from the `query-tee` perspective, a backend request is considered success
7272

7373
When the comparison is enabled, the `query-tee` compares the response received from the two configured backends and logs a message for each query whose results don't match, as well as keeps track of the number of successful and failed comparison through the metric `cortex_querytee_responses_compared_total`.
7474

75+
Floating point sample values are compared with a small tolerance that can be configured via `-proxy.value-comparison-tolerance`. This prevents false positives due to differences in floating point values _rounding_ introduced by the non deterministic series ordering within the Prometheus PromQL engine.
76+
7577
### Slow backends
7678

7779
`query-tee` sends back to the client the first viable response as soon as available, without waiting to receive a response from all backends.

tools/querytee/proxy.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,12 @@ var (
2424
)
2525

2626
type ProxyConfig struct {
27-
ServerServicePort int
28-
BackendEndpoints string
29-
PreferredBackend string
30-
BackendReadTimeout time.Duration
31-
CompareResponses bool
27+
ServerServicePort int
28+
BackendEndpoints string
29+
PreferredBackend string
30+
BackendReadTimeout time.Duration
31+
CompareResponses bool
32+
ValueComparisonTolerance float64
3233
}
3334

3435
func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) {
@@ -37,6 +38,7 @@ func (cfg *ProxyConfig) RegisterFlags(f *flag.FlagSet) {
3738
f.StringVar(&cfg.PreferredBackend, "backend.preferred", "", "The hostname of the preferred backend when selecting the response to send back to the client. If no preferred backend is configured then the query-tee will send back to the client the first successful response received without waiting for other backends.")
3839
f.DurationVar(&cfg.BackendReadTimeout, "backend.read-timeout", 90*time.Second, "The timeout when reading the response from a backend.")
3940
f.BoolVar(&cfg.CompareResponses, "proxy.compare-responses", false, "Compare responses between preferred and secondary endpoints for supported routes.")
41+
f.Float64Var(&cfg.ValueComparisonTolerance, "proxy.value-comparison-tolerance", 0.000001, "The tolerance to apply when comparing floating point values in the responses. 0 to disable tolerance and require exact match (not recommended).")
4042
}
4143

4244
type Route struct {

tools/querytee/response_comparator.go

Lines changed: 36 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package querytee
33
import (
44
"encoding/json"
55
"fmt"
6+
"math"
67

78
"github.com/go-kit/kit/log/level"
89
"github.com/pkg/errors"
@@ -12,7 +13,7 @@ import (
1213
)
1314

1415
// SamplesComparatorFunc helps with comparing different types of samples coming from /api/v1/query and /api/v1/query_range routes.
15-
type SamplesComparatorFunc func(expected, actual json.RawMessage) error
16+
type SamplesComparatorFunc func(expected, actual json.RawMessage, tolerance float64) error
1617

1718
type SamplesResponse struct {
1819
Status string
@@ -22,15 +23,19 @@ type SamplesResponse struct {
2223
}
2324
}
2425

25-
func NewSamplesComparator() *SamplesComparator {
26-
return &SamplesComparator{map[string]SamplesComparatorFunc{
27-
"matrix": compareMatrix,
28-
"vector": compareVector,
29-
"scalar": compareScalar,
30-
}}
26+
func NewSamplesComparator(tolerance float64) *SamplesComparator {
27+
return &SamplesComparator{
28+
tolerance: tolerance,
29+
sampleTypesComparator: map[string]SamplesComparatorFunc{
30+
"matrix": compareMatrix,
31+
"vector": compareVector,
32+
"scalar": compareScalar,
33+
},
34+
}
3135
}
3236

3337
type SamplesComparator struct {
38+
tolerance float64
3439
sampleTypesComparator map[string]SamplesComparatorFunc
3540
}
3641

@@ -44,12 +49,12 @@ func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte) err
4449

4550
err := json.Unmarshal(expectedResponse, &expected)
4651
if err != nil {
47-
return err
52+
return errors.Wrap(err, "unable to unmarshal expected response")
4853
}
4954

5055
err = json.Unmarshal(actualResponse, &actual)
5156
if err != nil {
52-
return err
57+
return errors.Wrap(err, "unable to unmarshal actual response")
5358
}
5459

5560
if expected.Status != actual.Status {
@@ -65,10 +70,10 @@ func (s *SamplesComparator) Compare(expectedResponse, actualResponse []byte) err
6570
return fmt.Errorf("resultType %s not registered for comparison", expected.Data.ResultType)
6671
}
6772

68-
return comparator(expected.Data.Result, actual.Data.Result)
73+
return comparator(expected.Data.Result, actual.Data.Result, s.tolerance)
6974
}
7075

71-
func compareMatrix(expectedRaw, actualRaw json.RawMessage) error {
76+
func compareMatrix(expectedRaw, actualRaw json.RawMessage, tolerance float64) error {
7277
var expected, actual model.Matrix
7378

7479
err := json.Unmarshal(expectedRaw, &expected)
@@ -113,7 +118,7 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage) error {
113118

114119
for i, expectedSamplePair := range expectedMetric.Values {
115120
actualSamplePair := actualMetric.Values[i]
116-
err := compareSamplePair(expectedSamplePair, actualSamplePair)
121+
err := compareSamplePair(expectedSamplePair, actualSamplePair, tolerance)
117122
if err != nil {
118123
return errors.Wrapf(err, "sample pair not matching for metric %s", expectedMetric.Metric)
119124
}
@@ -123,7 +128,7 @@ func compareMatrix(expectedRaw, actualRaw json.RawMessage) error {
123128
return nil
124129
}
125130

126-
func compareVector(expectedRaw, actualRaw json.RawMessage) error {
131+
func compareVector(expectedRaw, actualRaw json.RawMessage, tolerance float64) error {
127132
var expected, actual model.Vector
128133

129134
err := json.Unmarshal(expectedRaw, &expected)
@@ -159,7 +164,7 @@ func compareVector(expectedRaw, actualRaw json.RawMessage) error {
159164
}, model.SamplePair{
160165
Timestamp: actualMetric.Timestamp,
161166
Value: actualMetric.Value,
162-
})
167+
}, tolerance)
163168
if err != nil {
164169
return errors.Wrapf(err, "sample pair not matching for metric %s", expectedMetric.Metric)
165170
}
@@ -168,7 +173,7 @@ func compareVector(expectedRaw, actualRaw json.RawMessage) error {
168173
return nil
169174
}
170175

171-
func compareScalar(expectedRaw, actualRaw json.RawMessage) error {
176+
func compareScalar(expectedRaw, actualRaw json.RawMessage, tolerance float64) error {
172177
var expected, actual model.Scalar
173178
err := json.Unmarshal(expectedRaw, &expected)
174179
if err != nil {
@@ -186,16 +191,29 @@ func compareScalar(expectedRaw, actualRaw json.RawMessage) error {
186191
}, model.SamplePair{
187192
Timestamp: actual.Timestamp,
188193
Value: actual.Value,
189-
})
194+
}, tolerance)
190195
}
191196

192-
func compareSamplePair(expected, actual model.SamplePair) error {
197+
func compareSamplePair(expected, actual model.SamplePair, tolerance float64) error {
193198
if expected.Timestamp != actual.Timestamp {
194199
return fmt.Errorf("expected timestamp %v but got %v", expected.Timestamp, actual.Timestamp)
195200
}
196-
if expected.Value != actual.Value {
201+
if !compareSampleValue(expected.Value, actual.Value, tolerance) {
197202
return fmt.Errorf("expected value %s for timestamp %v but got %s", expected.Value, expected.Timestamp, actual.Value)
198203
}
199204

200205
return nil
201206
}
207+
208+
func compareSampleValue(first, second model.SampleValue, tolerance float64) bool {
209+
f := float64(first)
210+
s := float64(second)
211+
212+
if math.IsNaN(f) && math.IsNaN(s) {
213+
return true
214+
} else if tolerance <= 0 {
215+
return math.Float64bits(f) == math.Float64bits(s)
216+
}
217+
218+
return math.Abs(f-s) <= tolerance
219+
}

tools/querytee/response_comparator_test.go

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ func TestCompareMatrix(t *testing.T) {
9191
},
9292
} {
9393
t.Run(tc.name, func(t *testing.T) {
94-
err := compareMatrix(tc.expected, tc.actual)
94+
err := compareMatrix(tc.expected, tc.actual, 0)
9595
if tc.err == nil {
9696
require.NoError(t, err)
9797
return
@@ -174,7 +174,7 @@ func TestCompareVector(t *testing.T) {
174174
},
175175
} {
176176
t.Run(tc.name, func(t *testing.T) {
177-
err := compareVector(tc.expected, tc.actual)
177+
err := compareVector(tc.expected, tc.actual, 0)
178178
if tc.err == nil {
179179
require.NoError(t, err)
180180
return
@@ -211,7 +211,7 @@ func TestCompareScalar(t *testing.T) {
211211
},
212212
} {
213213
t.Run(tc.name, func(t *testing.T) {
214-
err := compareScalar(tc.expected, tc.actual)
214+
err := compareScalar(tc.expected, tc.actual, 0)
215215
if tc.err == nil {
216216
require.NoError(t, err)
217217
return
@@ -223,13 +223,12 @@ func TestCompareScalar(t *testing.T) {
223223
}
224224

225225
func TestCompareSamplesResponse(t *testing.T) {
226-
samplesComparator := NewSamplesComparator()
227-
228226
for _, tc := range []struct {
229-
name string
230-
expected json.RawMessage
231-
actual json.RawMessage
232-
err error
227+
name string
228+
tolerance float64
229+
expected json.RawMessage
230+
actual json.RawMessage
231+
err error
233232
}{
234233
{
235234
name: "difference in response status",
@@ -250,7 +249,7 @@ func TestCompareSamplesResponse(t *testing.T) {
250249
}`),
251250
actual: json.RawMessage(`{
252251
"status": "success",
253-
"data": {"resultType":"vector","result":{"metric":{"foo":"bar"},"value":[1,"1"]}}
252+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"1"]}]}
254253
}`),
255254
err: errors.New("expected resultType scalar but got vector"),
256255
},
@@ -277,8 +276,58 @@ func TestCompareSamplesResponse(t *testing.T) {
277276
"data": {"resultType":"scalar","result":[1,"1"]}
278277
}`),
279278
},
279+
{
280+
name: "should pass if values are slightly different but within the tolerance",
281+
tolerance: 0.000001,
282+
expected: json.RawMessage(`{
283+
"status": "success",
284+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"773054.5916666666"]}]}
285+
}`),
286+
actual: json.RawMessage(`{
287+
"status": "success",
288+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"773054.59166667"]}]}
289+
}`),
290+
},
291+
{
292+
name: "should correctly compare NaN values with tolerance is disabled",
293+
tolerance: 0,
294+
expected: json.RawMessage(`{
295+
"status": "success",
296+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"NaN"]}]}
297+
}`),
298+
actual: json.RawMessage(`{
299+
"status": "success",
300+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"NaN"]}]}
301+
}`),
302+
},
303+
{
304+
name: "should correctly compare NaN values with tolerance is enabled",
305+
tolerance: 0.000001,
306+
expected: json.RawMessage(`{
307+
"status": "success",
308+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"NaN"]}]}
309+
}`),
310+
actual: json.RawMessage(`{
311+
"status": "success",
312+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"NaN"]}]}
313+
}`),
314+
},
315+
{
316+
name: "should fail if values are significantly different, over the tolerance",
317+
tolerance: 0.000001,
318+
expected: json.RawMessage(`{
319+
"status": "success",
320+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"773054.5916666666"]}]}
321+
}`),
322+
actual: json.RawMessage(`{
323+
"status": "success",
324+
"data": {"resultType":"vector","result":[{"metric":{"foo":"bar"},"value":[1,"773054.789"]}]}
325+
}`),
326+
err: errors.New(`sample pair not matching for metric {foo="bar"}: expected value 773054.5916666666 for timestamp 1 but got 773054.789`),
327+
},
280328
} {
281329
t.Run(tc.name, func(t *testing.T) {
330+
samplesComparator := NewSamplesComparator(tc.tolerance)
282331
err := samplesComparator.Compare(tc.expected, tc.actual)
283332
if tc.err == nil {
284333
require.NoError(t, err)

0 commit comments

Comments
 (0)