Skip to content

Commit 037d4b3

Browse files
author
Ganesh Vernekar
committed
memoryChunks counter fix, metics updated, small cleanup
Signed-off-by: Ganesh Vernekar <[email protected]>
1 parent e618463 commit 037d4b3

File tree

4 files changed

+26
-39
lines changed

4 files changed

+26
-39
lines changed

pkg/ingester/ingester.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ type ingesterMetrics struct {
4848
queriedSamples prometheus.Histogram
4949
queriedSeries prometheus.Histogram
5050
queriedChunks prometheus.Histogram
51-
walReplayDuration prometheus.Summary
51+
walReplayDuration prometheus.Gauge
5252
}
5353

5454
func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics {
@@ -87,10 +87,9 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics {
8787
// A small number of chunks per series - 10*(8^(7-1)) = 2.6m.
8888
Buckets: prometheus.ExponentialBuckets(10, 8, 7),
8989
}),
90-
walReplayDuration: prometheus.NewSummary(prometheus.SummaryOpts{
91-
Name: "cortex_ingester_wal_replay_duration_seconds",
92-
Help: "Time taken to replay the checkpoint and the WAL.",
93-
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
90+
walReplayDuration: prometheus.NewGauge(prometheus.GaugeOpts{
91+
Name: "cortex_ingester_wal_replay_duration_seconds",
92+
Help: "Time taken to replay the checkpoint and the WAL.",
9493
}),
9594
}
9695

@@ -103,6 +102,7 @@ func newIngesterMetrics(r prometheus.Registerer) *ingesterMetrics {
103102
m.queriedSamples,
104103
m.queriedSeries,
105104
m.queriedChunks,
105+
m.walReplayDuration,
106106
)
107107
}
108108

@@ -247,11 +247,12 @@ func New(cfg Config, clientConfig client.Config, limits *validation.Overrides, c
247247
level.Info(util.Logger).Log("msg", "recovering from WAL")
248248
start := time.Now()
249249
if err := recoverFromWAL(i); err != nil {
250+
level.Error(util.Logger).Log("msg", "failed to recover from WAL", "time", time.Since(start).String())
250251
return nil, err
251252
}
252253
elapsed := time.Since(start)
253254
level.Info(util.Logger).Log("msg", "recovery from WAL completed", "time", elapsed.String())
254-
i.metrics.walReplayDuration.Observe(elapsed.Seconds())
255+
i.metrics.walReplayDuration.Set(elapsed.Seconds())
255256
}
256257

257258
// If the WAL recover happened, then the userStates would already be set.

pkg/ingester/series.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,6 @@ func (s *memorySeries) setChunks(descs []*desc) error {
204204
if len(descs) > 0 {
205205
s.lastTime = descs[len(descs)-1].LastTime
206206
}
207-
memoryChunks.Add(float64(len(descs)))
208-
209207
return nil
210208
}
211209

pkg/ingester/wal.go

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,10 @@ type walWrapper struct {
7272
checkpointDeleteTotal prometheus.Counter
7373
checkpointCreationFail prometheus.Counter
7474
checkpointCreationTotal prometheus.Counter
75+
checkpointDuration prometheus.Summary
7576
}
7677

77-
// newWAL creates a WAL object.
78-
// * If the WAL is disabled, then the returned WAL is a no-op WAL.
79-
// * If WAL recovery is enabled, then the userStates is always set for ingester.
78+
// newWAL creates a WAL object. If the WAL is disabled, then the returned WAL is a no-op WAL.
8079
func newWAL(cfg WALConfig, userStatesFunc func() map[string]*userState) (WAL, error) {
8180
if !cfg.walEnabled {
8281
return &noopWAL{}, nil
@@ -114,12 +113,18 @@ func newWAL(cfg WALConfig, userStatesFunc func() map[string]*userState) (WAL, er
114113
Name: "ingester_checkpoint_creations_total",
115114
Help: "Total number of checkpoint creations attempted.",
116115
})
116+
w.checkpointDuration = prometheus.NewSummary(prometheus.SummaryOpts{
117+
Name: "ingester_checkpoint_duration_seconds",
118+
Help: "Time taken to create a checkpoint.",
119+
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001},
120+
})
117121
if cfg.metricsRegisterer != nil {
118122
cfg.metricsRegisterer.MustRegister(
119123
w.checkpointDeleteFail,
120124
w.checkpointDeleteTotal,
121125
w.checkpointCreationFail,
122126
w.checkpointCreationTotal,
127+
w.checkpointDuration,
123128
)
124129
}
125130

@@ -131,7 +136,6 @@ func newWAL(cfg WALConfig, userStatesFunc func() map[string]*userState) (WAL, er
131136
func (w *walWrapper) Stop() {
132137
close(w.quit)
133138
w.wait.Wait()
134-
135139
w.wal.Close()
136140
}
137141

@@ -161,7 +165,7 @@ func (w *walWrapper) run() {
161165
ticker := time.NewTicker(w.cfg.checkpointDuration)
162166
defer ticker.Stop()
163167

164-
for !w.isStopped() {
168+
for {
165169
select {
166170
case <-ticker.C:
167171
start := time.Now()
@@ -172,7 +176,9 @@ func (w *walWrapper) run() {
172176
}
173177
elapsed := time.Since(start)
174178
level.Info(util.Logger).Log("msg", "checkpoint done", "time", elapsed.String())
179+
w.checkpointDuration.Observe(elapsed.Seconds())
175180
case <-w.quit:
181+
level.Info(util.Logger).Log("msg", "creating checkpoint before shutdown")
176182
if err := w.performCheckpoint(); err != nil {
177183
level.Error(util.Logger).Log("msg", "error checkpointing series during shutdown", "err", err)
178184
}
@@ -181,15 +187,6 @@ func (w *walWrapper) run() {
181187
}
182188
}
183189

184-
func (w *walWrapper) isStopped() bool {
185-
select {
186-
case <-w.quit:
187-
return true
188-
default:
189-
return false
190-
}
191-
}
192-
193190
const checkpointPrefix = "checkpoint."
194191

195192
func (w *walWrapper) performCheckpoint() (err error) {
@@ -511,11 +508,8 @@ Loop:
511508
case capturedErr = <-errChan:
512509
return capturedErr
513510
default:
514-
if err := reader.Err(); err != nil {
515-
return err
516-
}
511+
return reader.Err()
517512
}
518-
return nil
519513
}
520514

521515
func copyLabelAdapters(las []client.LabelAdapter) []client.LabelAdapter {
@@ -563,6 +557,7 @@ func processCheckpointRecord(userStates *userStates, seriesPool *sync.Pool, stat
563557
errChan <- err
564558
return
565559
}
560+
memoryChunks.Add(float64(len(descs)))
566561

567562
seriesCache[s.UserId][s.Fingerprint] = series
568563
seriesPool.Put(s)
@@ -705,16 +700,8 @@ Loop:
705700
case capturedErr = <-errChan:
706701
return capturedErr
707702
default:
708-
if err := reader.Err(); err != nil {
709-
return err
710-
}
703+
return reader.Err()
711704
}
712-
713-
if err != nil {
714-
return err
715-
}
716-
717-
return nil
718705
}
719706

720707
func processWALSamples(userStates *userStates, stateCache map[string]*userState, seriesCache map[string]map[uint64]*memorySeries,

pkg/ring/lifecycler.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,11 +114,12 @@ type Lifecycler struct {
114114
actorChan chan func()
115115

116116
// These values are initialised at startup, and never change
117+
ID string
118+
Addr string
119+
RingName string
120+
RingKey string
117121

118-
ID string
119-
Addr string
120-
RingName string
121-
RingKey string
122+
// Whether to flush if transfer fails on shutdown.
122123
flushOnShutdown bool
123124

124125
// We need to remember the ingester state just in case consul goes away and comes

0 commit comments

Comments
 (0)