Skip to content

Commit b51bf81

Browse files
track oldest delete request age since their cancellation period is over (#2806)
* track oldest delete request age since their cancellation period is over Signed-off-by: Sandeep Sukhani <[email protected]> * update metric for oldest pending request and number of pending requests to 0 sooner Signed-off-by: Sandeep Sukhani <[email protected]> * changes suggested from PR review Signed-off-by: Sandeep Sukhani <[email protected]> * update changelog Signed-off-by: Sandeep Sukhani <[email protected]> * minor nits suggested in PR review Signed-off-by: Sandeep Sukhani <[email protected]> * Fixed flaky test Signed-off-by: Marco Pracucci <[email protected]> Co-authored-by: Marco Pracucci <[email protected]>
1 parent 92622a8 commit b51bf81

File tree

4 files changed

+18
-12
lines changed

4 files changed

+18
-12
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
* `cortex_bucket_stores_gate_queries_in_flight`
99
* `cortex_bucket_stores_gate_duration_seconds`
1010
* [CHANGE] Metric `cortex_ingester_flush_reasons` has been renamed to `cortex_ingester_series_flushed_total`, and is now incremented during flush, not when series is enqueued for flushing. #2802
11+
* [CHANGE] Experimental Delete Series: Metric `cortex_purger_oldest_pending_delete_request_age_seconds` would track age of delete requests since they are over their cancellation period instead of their creation time. #2806
1112
* [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783
1213
* [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783
1314
* [FEATURE] Introduced `ruler.resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783

pkg/chunk/purger/purger.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,12 @@ func newPurgerMetrics(r prometheus.Registerer) *purgerMetrics {
6666
m.oldestPendingDeleteRequestAgeSeconds = promauto.With(r).NewGauge(prometheus.GaugeOpts{
6767
Namespace: "cortex",
6868
Name: "purger_oldest_pending_delete_request_age_seconds",
69-
Help: "Age of oldest pending delete request in seconds",
69+
Help: "Age of oldest pending delete request in seconds, since they are over their cancellation period",
7070
})
7171
m.pendingDeleteRequestsCount = promauto.With(r).NewGauge(prometheus.GaugeOpts{
7272
Namespace: "cortex",
7373
Name: "purger_pending_delete_requests_count",
74-
Help: "Count of requests which are in process or are ready to be processed",
74+
Help: "Count of delete requests which are over their cancellation period and have not finished processing yet",
7575
})
7676

7777
return &m
@@ -248,6 +248,10 @@ func (p *Purger) workerJobCleanup(job workerJob) {
248248
default:
249249
// already sent
250250
}
251+
} else if len(p.usersWithPendingRequests) == 0 {
252+
// there are no pending requests from any of the users, set the oldest pending request and number of pending requests to 0
253+
p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(0)
254+
p.metrics.pendingDeleteRequestsCount.Set(0)
251255
}
252256
} else {
253257
p.pendingPlansCountMtx.Unlock()
@@ -409,7 +413,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
409413
p.inProcessRequestIDsMtx.RUnlock()
410414

411415
now := model.Now()
412-
oldestPendingRequestCreatedAt := now
416+
oldestPendingRequestCreatedAt := model.Time(0)
413417

414418
// requests which are still being processed are also considered pending
415419
if pendingDeleteRequestsCount != 0 {
@@ -426,7 +430,7 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
426430
}
427431

428432
pendingDeleteRequestsCount++
429-
if deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
433+
if oldestPendingRequestCreatedAt == 0 || deleteRequest.CreatedAt.Before(oldestPendingRequestCreatedAt) {
430434
oldestPendingRequestCreatedAt = deleteRequest.CreatedAt
431435
}
432436

@@ -473,7 +477,12 @@ func (p *Purger) pullDeleteRequestsToPlanDeletes() error {
473477
p.executePlansChan <- req
474478
}
475479

476-
p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(now.Sub(oldestPendingRequestCreatedAt) / time.Second))
480+
// track age of oldest delete request since they are over their cancellation period
481+
oldestPendingRequestAge := time.Duration(0)
482+
if oldestPendingRequestCreatedAt != 0 {
483+
oldestPendingRequestAge = now.Sub(oldestPendingRequestCreatedAt.Add(p.cfg.DeleteRequestCancelPeriod))
484+
}
485+
p.metrics.oldestPendingDeleteRequestAgeSeconds.Set(float64(oldestPendingRequestAge / time.Second))
477486
p.metrics.pendingDeleteRequestsCount.Set(float64(pendingDeleteRequestsCount))
478487

479488
return nil

pkg/chunk/purger/purger_test.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -410,8 +410,8 @@ func TestPurger_Metrics(t *testing.T) {
410410
// load new delete requests for processing
411411
require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
412412

413-
// there must be 2 pending delete requests, oldest being 3 days old
414-
require.InDelta(t, float64(3*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
413+
// there must be 2 pending delete requests, oldest being 2 days old since its cancellation time is over
414+
require.InDelta(t, float64(2*86400), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
415415
require.Equal(t, float64(2), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))
416416

417417
// start loop to process requests
@@ -429,9 +429,6 @@ func TestPurger_Metrics(t *testing.T) {
429429
return testutil.ToFloat64(purger.metrics.deleteRequestsProcessedTotal)
430430
})
431431

432-
// load new delete requests for processing which should update the metrics
433-
require.NoError(t, purger.pullDeleteRequestsToPlanDeletes())
434-
435432
// there must be 0 pending delete requests so the age for oldest pending must be 0
436433
require.InDelta(t, float64(0), testutil.ToFloat64(purger.metrics.oldestPendingDeleteRequestAgeSeconds), 1)
437434
require.Equal(t, float64(0), testutil.ToFloat64(purger.metrics.pendingDeleteRequestsCount))

pkg/testexporter/correctness/runner_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ func TestMinQueryTime(t *testing.T) {
2727
}
2828

2929
for _, tt := range tests {
30-
//require.Equal(t, tt.expected, tc.MinQueryTime())
31-
assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 5*time.Millisecond)
30+
assert.WithinDuration(t, tt.expected, calculateMinQueryTime(tt.durationQuerySince, tt.timeQueryStart), 50*time.Millisecond)
3231
}
3332
}

0 commit comments

Comments
 (0)