Skip to content

Commit 14e0746

Browse files
committed
Added cached collector.
Signed-off-by: Bartlomiej Plotka <[email protected]> update. Signed-off-by: Bartlomiej Plotka <[email protected]> Attempt 2 Signed-off-by: Bartlomiej Plotka <[email protected]> Added blocking registry, with raw collector and transactional handler. Signed-off-by: Bartlomiej Plotka <[email protected]> Added fast path to normal (empty) registry to save 8 allocs and 3K5B per Gather. Signed-off-by: Bartlomiej Plotka <[email protected]> Simplified API, added tests. Signed-off-by: Bartlomiej Plotka <[email protected]> Fix. Signed-off-by: Bartlomiej Plotka <[email protected]> Simplified implementation. Signed-off-by: Bartlomiej Plotka <[email protected]> Added benchmark. Signed-off-by: Bartlomiej Plotka <[email protected]> Optimized. Signed-off-by: Bartlomiej Plotka <[email protected]>
1 parent f63e219 commit 14e0746

File tree

11 files changed

+785
-34
lines changed

11 files changed

+785
-34
lines changed

prometheus/cache/cache.go

Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
// Copyright 2022 The Prometheus Authors
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package cache
15+
16+
import (
17+
"errors"
18+
"fmt"
19+
"sort"
20+
"sync"
21+
"time"
22+
23+
"github.com/cespare/xxhash/v2"
24+
"github.com/prometheus/client_golang/prometheus"
25+
"github.com/prometheus/common/model"
26+
27+
//nolint:staticcheck // Ignore SA1019. Need to keep deprecated package for compatibility.
28+
"github.com/golang/protobuf/proto"
29+
"github.com/prometheus/client_golang/prometheus/internal"
30+
dto "github.com/prometheus/client_model/go"
31+
)
32+
33+
var _ prometheus.TransactionalGatherer = &CachedTGatherer{}
34+
35+
var separatorByteSlice = []byte{model.SeparatorByte} // For convenient use with xxhash.
36+
37+
// CachedTGatherer is a transactional gatherer that allows maintaining set of metrics which
38+
// change less frequently than scrape time, yet label values and values change over time.
39+
//
40+
// If you happen to use NewDesc, NewConstMetric or MustNewConstMetric inside Collector.Collect routine, consider
41+
// using CachedTGatherer instead.
42+
//
43+
// Use CachedTGatherer with classic Registry using NewMultiTRegistry and ToTransactionalGatherer helpers.
44+
// NOTE(bwplotka): Experimental, API and behaviour can change.
45+
type CachedTGatherer struct {
46+
metrics map[uint64]*dto.Metric
47+
metricFamilyByName map[string]*dto.MetricFamily
48+
mMu sync.RWMutex
49+
}
50+
51+
func NewCachedTGatherer() *CachedTGatherer {
52+
return &CachedTGatherer{
53+
metrics: make(map[uint64]*dto.Metric),
54+
metricFamilyByName: map[string]*dto.MetricFamily{},
55+
}
56+
}
57+
58+
// Gather implements TransactionalGatherer interface.
59+
func (c *CachedTGatherer) Gather() (_ []*dto.MetricFamily, done func(), err error) {
60+
c.mMu.RLock()
61+
62+
// BenchmarkCachedTGatherer_Update shows, even for 1 million metrics with 1000 families
63+
// this is efficient enough (~300µs and ~50 kB per op), no need to cache it for now.
64+
return internal.NormalizeMetricFamilies(c.metricFamilyByName), c.mMu.RUnlock, nil
65+
}
66+
67+
type Key struct {
68+
FQName string // __name__
69+
70+
// Label names can be unsorted, we will be sorting them later. The only implication is cachability if
71+
// consumer provide non-deterministic order of those.
72+
LabelNames []string
73+
LabelValues []string
74+
}
75+
76+
func (k Key) isValid() error {
77+
if k.FQName == "" {
78+
return errors.New("FQName cannot be empty")
79+
}
80+
if len(k.LabelNames) != len(k.LabelValues) {
81+
return errors.New("new metric: label name has different length than values")
82+
}
83+
84+
return nil
85+
}
86+
87+
// hash returns unique hash for this key.
88+
func (k Key) hash() uint64 {
89+
h := xxhash.New()
90+
h.WriteString(k.FQName)
91+
h.Write(separatorByteSlice)
92+
93+
for i := range k.LabelNames {
94+
h.WriteString(k.LabelNames[i])
95+
h.Write(separatorByteSlice)
96+
h.WriteString(k.LabelValues[i])
97+
h.Write(separatorByteSlice)
98+
}
99+
return h.Sum64()
100+
}
101+
102+
// Insert represents record to set in cache.
103+
type Insert struct {
104+
Key
105+
106+
Help string
107+
ValueType prometheus.ValueType
108+
Value float64
109+
110+
// Timestamp is optional. Pass nil for no explicit timestamp.
111+
Timestamp *time.Time
112+
}
113+
114+
// Update goes through inserts and deletions and updates current cache in concurrency safe manner.
115+
// If reset is set to true, all inserts and deletions are working on empty cache. In such case
116+
// this implementation tries to reuse memory from existing cached item when possible.
117+
//
118+
// Update reuses insert struct memory, so after use, Insert slice and its elements cannot be reused
119+
// outside of this method.
120+
// TODO(bwplotka): Lack of copying can pose memory safety problems if insert variables are reused. Consider copying if value
121+
// is different. Yet it gives significant allocation gains.
122+
func (c *CachedTGatherer) Update(reset bool, inserts []Insert, deletions []Key) error {
123+
c.mMu.Lock()
124+
defer c.mMu.Unlock()
125+
126+
currMetrics := c.metrics
127+
currMetricFamilies := c.metricFamilyByName
128+
if reset {
129+
currMetrics = make(map[uint64]*dto.Metric, len(c.metrics))
130+
currMetricFamilies = make(map[string]*dto.MetricFamily, len(c.metricFamilyByName))
131+
}
132+
133+
errs := prometheus.MultiError{}
134+
for i := range inserts {
135+
// TODO(bwplotka): Validate more about this insert?
136+
if err := inserts[i].isValid(); err != nil {
137+
errs.Append(err)
138+
continue
139+
}
140+
141+
// Update metric family.
142+
mf, ok := c.metricFamilyByName[inserts[i].FQName]
143+
if !ok {
144+
mf = &dto.MetricFamily{}
145+
mf.Name = &inserts[i].FQName
146+
} else if reset {
147+
// Reset metric slice, since we want to start from scratch.
148+
mf.Metric = mf.Metric[:0]
149+
}
150+
mf.Type = inserts[i].ValueType.ToDTO()
151+
mf.Help = &inserts[i].Help
152+
153+
currMetricFamilies[inserts[i].FQName] = mf
154+
155+
// Update metric pointer.
156+
hSum := inserts[i].hash()
157+
m, ok := c.metrics[hSum]
158+
if !ok {
159+
m = &dto.Metric{Label: make([]*dto.LabelPair, 0, len(inserts[i].LabelNames))}
160+
for j := range inserts[i].LabelNames {
161+
m.Label = append(m.Label, &dto.LabelPair{
162+
Name: &inserts[i].LabelNames[j],
163+
Value: &inserts[i].LabelValues[j],
164+
})
165+
}
166+
sort.Sort(internal.LabelPairSorter(m.Label))
167+
}
168+
169+
switch inserts[i].ValueType {
170+
case prometheus.CounterValue:
171+
v := m.Counter
172+
if v == nil {
173+
v = &dto.Counter{}
174+
}
175+
v.Value = &inserts[i].Value
176+
m.Counter = v
177+
m.Gauge = nil
178+
m.Untyped = nil
179+
case prometheus.GaugeValue:
180+
v := m.Gauge
181+
if v == nil {
182+
v = &dto.Gauge{}
183+
}
184+
v.Value = &inserts[i].Value
185+
m.Counter = nil
186+
m.Gauge = v
187+
m.Untyped = nil
188+
case prometheus.UntypedValue:
189+
v := m.Untyped
190+
if v == nil {
191+
v = &dto.Untyped{}
192+
}
193+
v.Value = &inserts[i].Value
194+
m.Counter = nil
195+
m.Gauge = nil
196+
m.Untyped = v
197+
default:
198+
return fmt.Errorf("unsupported value type %v", inserts[i].ValueType)
199+
}
200+
201+
m.TimestampMs = nil
202+
if inserts[i].Timestamp != nil {
203+
m.TimestampMs = proto.Int64(inserts[i].Timestamp.Unix()*1000 + int64(inserts[i].Timestamp.Nanosecond()/1000000))
204+
}
205+
currMetrics[hSum] = m
206+
207+
if !reset && ok {
208+
// If we did update without reset and we found metric in previous
209+
// map, we know metric pointer exists in metric family map, so just continue.
210+
continue
211+
}
212+
213+
// Will be sorted later anyway, so just append.
214+
mf.Metric = append(mf.Metric, m)
215+
}
216+
217+
for _, del := range deletions {
218+
if err := del.isValid(); err != nil {
219+
errs.Append(err)
220+
continue
221+
}
222+
223+
hSum := del.hash()
224+
m, ok := currMetrics[hSum]
225+
if !ok {
226+
continue
227+
}
228+
delete(currMetrics, hSum)
229+
230+
mf, ok := currMetricFamilies[del.FQName]
231+
if !ok {
232+
// Impossible, but well...
233+
errs.Append(fmt.Errorf("could not remove metric %s(%s) from metric family, metric family does not exists", del.FQName, del.LabelValues))
234+
continue
235+
}
236+
237+
toDel := -1
238+
for i := range mf.Metric {
239+
if mf.Metric[i] == m {
240+
toDel = i
241+
break
242+
}
243+
}
244+
245+
if toDel == -1 {
246+
errs.Append(fmt.Errorf("could not remove metric %s(%s) from metric family, metric family does not have such metric", del.FQName, del.LabelValues))
247+
continue
248+
}
249+
250+
if len(mf.Metric) == 1 {
251+
delete(currMetricFamilies, del.FQName)
252+
continue
253+
}
254+
255+
mf.Metric = append(mf.Metric[:toDel], mf.Metric[toDel+1:]...)
256+
}
257+
258+
c.metrics = currMetrics
259+
c.metricFamilyByName = currMetricFamilies
260+
return errs.MaybeUnwrap()
261+
}

0 commit comments

Comments
 (0)