diff --git a/CHANGELOG.md b/CHANGELOG.md index 638e41727f4..39e2dcdc341 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * `cortex_bucket_stores_gate_queries_concurrent_max` * `cortex_bucket_stores_gate_queries_in_flight` * `cortex_bucket_stores_gate_duration_seconds` +* [CHANGE] Metric `cortex_ingester_flush_reasons` has been renamed to `cortex_ingester_series_flushed_total`, and is now incremented during flush, not when series is enqueued for flushing. #2802 * [FEATURE] Introduced `ruler.for-outage-tolerance`, Max time to tolerate outage for restoring "for" state of alert. #2783 * [FEATURE] Introduced `ruler.for-grace-period`, Minimum duration between alert and restored "for" state. This is maintained only for alerts with configured "for" time greater than grace period. #2783 * [FEATURE] Introduced `ruler.resend-delay`, Minimum amount of time to wait before resending an alert to Alertmanager. #2783 diff --git a/pkg/ingester/flush.go b/pkg/ingester/flush.go index 7dee669454b..5d4a887dea1 100644 --- a/pkg/ingester/flush.go +++ b/pkg/ingester/flush.go @@ -135,7 +135,6 @@ func (i *Ingester) sweepSeries(userID string, fp model.Fingerprint, series *memo flushQueueIndex := int(uint64(fp) % uint64(i.cfg.ConcurrentFlushes)) if i.flushQueues[flushQueueIndex].Enqueue(&flushOp{firstTime, userID, fp, immediate}) { - i.metrics.flushReasons.WithLabelValues(flush.String()).Inc() util.Event().Log("msg", "add to flush queue", "userID", userID, "reason", flush, "firstTime", firstTime, "fp", fp, "series", series.metric, "nlabels", len(series.metric), "queue", flushQueueIndex) } } @@ -279,6 +278,8 @@ func (i *Ingester) flushUserSeries(flushQueueIndex int, userID string, fp model. return nil } + i.metrics.flushedSeries.WithLabelValues(reason.String()).Inc() + // flush the chunks without locking the series, as we don't want to hold the series lock for the duration of the dynamo/s3 rpcs. ctx, cancel := context.WithTimeout(context.Background(), i.cfg.FlushOpTimeout) defer cancel() // releases resources if slowOperation completes before timeout elapses diff --git a/pkg/ingester/metrics.go b/pkg/ingester/metrics.go index 91adcaed3b1..52ff53ad3cd 100644 --- a/pkg/ingester/metrics.go +++ b/pkg/ingester/metrics.go @@ -52,7 +52,7 @@ type ingesterMetrics struct { chunkSize prometheus.Histogram chunkAge prometheus.Histogram memoryChunks prometheus.Gauge - flushReasons *prometheus.CounterVec + flushedSeries *prometheus.CounterVec droppedChunks prometheus.Counter oldestUnflushedChunkTimestamp prometheus.Gauge } @@ -187,9 +187,9 @@ func newIngesterMetrics(r prometheus.Registerer, createMetricsConflictingWithTSD Name: "cortex_ingester_memory_chunks", Help: "The total number of chunks in memory.", }), - flushReasons: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_flush_reasons", - Help: "Total number of series scheduled for flushing, with reasons.", + flushedSeries: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_ingester_series_flushed_total", + Help: "Total number of flushed series, with reasons.", }, []string{"reason"}), droppedChunks: promauto.With(r).NewCounter(prometheus.CounterOpts{ Name: "cortex_ingester_dropped_chunks_total",