Skip to content

Commit 8c50a02

Browse files
committed
Metrics for discarded samples (soft errors)
Signed-off-by: Peter Štibraný <[email protected]>
1 parent 454712e commit 8c50a02

File tree

3 files changed

+34
-4
lines changed

3 files changed

+34
-4
lines changed

pkg/ingester/ingester_v2.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,15 @@ func (i *Ingester) v2Push(ctx context.Context, req *client.WriteRequest) (*clien
314314
firstPartialErr = errors.Wrapf(err, "series=%s, timestamp=%v", client.FromLabelAdaptersToLabels(ts.Labels).String(), model.Time(s.TimestampMs).Time().Format(time.RFC3339Nano))
315315
}
316316

317+
switch cause {
318+
case tsdb.ErrOutOfBounds:
319+
validation.DiscardedSamples.WithLabelValues(sampleOutOfBounds, userID).Inc()
320+
case tsdb.ErrOutOfOrderSample:
321+
validation.DiscardedSamples.WithLabelValues(sampleOutOfOrder, userID).Inc()
322+
case tsdb.ErrAmendSample:
323+
validation.DiscardedSamples.WithLabelValues(newValueForTimestamp, userID).Inc()
324+
}
325+
317326
continue
318327
}
319328

pkg/ingester/ingester_v2_test.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ func TestIngester_v2Push(t *testing.T) {
4545
"cortex_ingester_memory_users",
4646
"cortex_ingester_memory_series_created_total",
4747
"cortex_ingester_memory_series_removed_total",
48+
"cortex_discarded_samples_total",
4849
}
4950
userID := "test"
5051

@@ -124,6 +125,9 @@ func TestIngester_v2Push(t *testing.T) {
124125
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
125126
# TYPE cortex_ingester_memory_series_removed_total counter
126127
cortex_ingester_memory_series_removed_total{user="test"} 0
128+
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
129+
# TYPE cortex_discarded_samples_total counter
130+
cortex_discarded_samples_total{reason="sample-out-of-order",user="test"} 1
127131
`,
128132
},
129133
"should soft fail on sample out of bound": {
@@ -160,6 +164,9 @@ func TestIngester_v2Push(t *testing.T) {
160164
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
161165
# TYPE cortex_ingester_memory_series_removed_total counter
162166
cortex_ingester_memory_series_removed_total{user="test"} 0
167+
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
168+
# TYPE cortex_discarded_samples_total counter
169+
cortex_discarded_samples_total{reason="out-of-bounds",user="test"} 1
163170
`,
164171
},
165172
"should soft fail on two different sample values at the same timestamp": {
@@ -196,6 +203,9 @@ func TestIngester_v2Push(t *testing.T) {
196203
# HELP cortex_ingester_memory_series_removed_total The total number of series that were removed per user.
197204
# TYPE cortex_ingester_memory_series_removed_total counter
198205
cortex_ingester_memory_series_removed_total{user="test"} 0
206+
# HELP cortex_discarded_samples_total The total number of samples that were discarded.
207+
# TYPE cortex_discarded_samples_total counter
208+
cortex_discarded_samples_total{reason="new-value-for-timestamp",user="test"} 1
199209
`,
200210
},
201211
}
@@ -204,6 +214,9 @@ func TestIngester_v2Push(t *testing.T) {
204214
t.Run(testName, func(t *testing.T) {
205215
registry := prometheus.NewRegistry()
206216

217+
registry.MustRegister(validation.DiscardedSamples)
218+
validation.DiscardedSamples.Reset()
219+
207220
// Create a mocked ingester
208221
cfg := defaultIngesterTestConfig()
209222
cfg.LifecyclerConfig.JoinAfter = 0

pkg/ingester/series.go

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,14 @@ import (
1313
"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
1414
)
1515

16+
const (
17+
sampleOutOfOrder = "sample-out-of-order"
18+
newValueForTimestamp = "new-value-for-timestamp"
19+
sampleOutOfBounds = "out-of-bounds"
20+
duplicateSample = "duplicate-sample"
21+
duplicateTimestamp = "duplicate-timestamp"
22+
)
23+
1624
type memorySeries struct {
1725
metric labels.Labels
1826

@@ -51,19 +59,19 @@ func (s *memorySeries) add(v model.SamplePair) error {
5159
// If we don't know what the last sample value is, silently discard.
5260
// This will mask some errors but better than complaining when we don't really know.
5361
if !s.lastSampleValueSet {
54-
return makeNoReportError("duplicate-timestamp")
62+
return makeNoReportError(duplicateTimestamp)
5563
}
5664
// If both timestamp and sample value are the same as for the last append,
5765
// ignore as they are a common occurrence when using client-side timestamps
5866
// (e.g. Pushgateway or federation).
5967
if v.Value.Equal(s.lastSampleValue) {
60-
return makeNoReportError("duplicate-sample")
68+
return makeNoReportError(duplicateSample)
6169
}
62-
return makeMetricValidationError("new-value-for-timestamp", s.metric,
70+
return makeMetricValidationError(newValueForTimestamp, s.metric,
6371
fmt.Errorf("sample with repeated timestamp but different value; last value: %v, incoming value: %v", s.lastSampleValue, v.Value))
6472
}
6573
if v.Timestamp < s.lastTime {
66-
return makeMetricValidationError("sample-out-of-order", s.metric,
74+
return makeMetricValidationError(sampleOutOfOrder, s.metric,
6775
fmt.Errorf("sample timestamp out of order; last timestamp: %v, incoming timestamp: %v", s.lastTime, v.Timestamp))
6876
}
6977

0 commit comments

Comments
 (0)