From 6d08e49b15de38902ca65de432ea12a870d222a9 Mon Sep 17 00:00:00 2001 From: Cyril Tovena Date: Mon, 29 Jul 2019 13:50:05 -0400 Subject: [PATCH 1/2] Adds v11 schema to store label names within index. Stores only label names and not the entire metric. Storing entire metric will bloat the index by 30% and it doesn't really make sense to do it right now. Adding just label names adds a tolerable 7% to the index. Also, in Prometheus, we don't treat __name__ as a special label. We always return it when calling /labels API and we should do the same here. Signed-off-by: Cyril Tovena Signed-off-by: Goutham Veeramachaneni --- docs/single-process-config.yaml | 5 +- pkg/chunk/chunk_store_test.go | 5 +- pkg/chunk/chunk_store_utils.go | 8 ++- pkg/chunk/composite_store.go | 2 +- pkg/chunk/inmemory_storage_client.go | 1 + pkg/chunk/schema.go | 76 ++++++++++++++++++++++++++++ pkg/chunk/schema_caching.go | 8 +++ pkg/chunk/schema_config.go | 17 ++++--- pkg/chunk/schema_test.go | 55 ++++++++++++++++++++ pkg/chunk/series_store.go | 62 ++++++++++++++++++++++- 10 files changed, 221 insertions(+), 18 deletions(-) diff --git a/docs/single-process-config.yaml b/docs/single-process-config.yaml index 2d1847fcb0..13d5fc221a 100644 --- a/docs/single-process-config.yaml +++ b/docs/single-process-config.yaml @@ -51,10 +51,10 @@ ingester: # for the chunks. schema: configs: - - from: 2019-03-25 + - from: 2019-07-29 store: boltdb object_store: filesystem - schema: v10 + schema: v11 index: prefix: index_ period: 168h @@ -65,4 +65,3 @@ storage: filesystem: directory: /tmp/cortex/chunks - diff --git a/pkg/chunk/chunk_store_test.go b/pkg/chunk/chunk_store_test.go index 00e943bff2..6951f80881 100644 --- a/pkg/chunk/chunk_store_test.go +++ b/pkg/chunk/chunk_store_test.go @@ -37,6 +37,7 @@ var schemas = []struct { {"v6", true}, {"v9", true}, {"v10", true}, + {"v11", true}, } var stores = []struct { @@ -416,11 +417,11 @@ func TestChunkStore_LabelNamesForMetricName(t *testing.T) { }{ { `foo`, - []string{"bar", "flip", "toms"}, + []string{labels.MetricName, "bar", "flip", "toms"}, }, { `bar`, - []string{"bar", "toms"}, + []string{labels.MetricName, "bar", "toms"}, }, } { for _, schema := range schemas { diff --git a/pkg/chunk/chunk_store_utils.go b/pkg/chunk/chunk_store_utils.go index 9a5ef76ea3..1d83a0600b 100644 --- a/pkg/chunk/chunk_store_utils.go +++ b/pkg/chunk/chunk_store_utils.go @@ -42,11 +42,9 @@ func labelNamesFromChunks(chunks []Chunk) []string { var result []string for _, c := range chunks { for _, l := range c.Metric { - if l.Name != model.MetricNameLabel { - if _, ok := keys[string(l.Name)]; !ok { - keys[string(l.Name)] = struct{}{} - result = append(result, string(l.Name)) - } + if _, ok := keys[string(l.Name)]; !ok { + keys[string(l.Name)] = struct{}{} + result = append(result, string(l.Name)) } } } diff --git a/pkg/chunk/composite_store.go b/pkg/chunk/composite_store.go index f22aad0705..3680dc67ef 100644 --- a/pkg/chunk/composite_store.go +++ b/pkg/chunk/composite_store.go @@ -55,7 +55,7 @@ func (c *CompositeStore) AddPeriod(storeCfg StoreConfig, cfg PeriodConfig, index var store Store var err error switch cfg.Schema { - case "v9", "v10": + case "v9", "v10", "v11": store, err = newSeriesStore(storeCfg, schema, index, chunks, limits) default: store, err = newStore(storeCfg, schema, index, chunks, limits) diff --git a/pkg/chunk/inmemory_storage_client.go b/pkg/chunk/inmemory_storage_client.go index 56251a19f3..40603545ab 100644 --- a/pkg/chunk/inmemory_storage_client.go +++ b/pkg/chunk/inmemory_storage_client.go @@ -165,6 +165,7 @@ func (m *MockStorage) BatchWrite(ctx context.Context, batch WriteBatch) error { itemComponents := decodeRangeKey(items[i].rangeValue) if !bytes.Equal(itemComponents[3], metricNameRangeKeyV1) && !bytes.Equal(itemComponents[3], seriesRangeKeyV1) && + !bytes.Equal(itemComponents[3], metricConstRangeKeyV1) && !bytes.Equal(itemComponents[3], labelSeriesRangeKeyV1) { return fmt.Errorf("Dupe write") } diff --git a/pkg/chunk/schema.go b/pkg/chunk/schema.go index 8e2f5b8465..dd33379da9 100644 --- a/pkg/chunk/schema.go +++ b/pkg/chunk/schema.go @@ -7,6 +7,7 @@ import ( "fmt" "strings" + jsoniter "github.com/json-iterator/go" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/pkg/labels" ) @@ -22,6 +23,8 @@ var ( // For v9 schema seriesRangeKeyV1 = []byte{'7'} labelSeriesRangeKeyV1 = []byte{'8'} + // For v11 schema + metricConstRangeKeyV1 = []byte{'9'} // ErrNotSupported when a schema doesn't support that particular lookup. ErrNotSupported = errors.New("not supported") @@ -45,6 +48,8 @@ type Schema interface { // If the query resulted in series IDs, use this method to find chunks. GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) + // Returns queries to retrieve all labels of multiple series by id. + GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) } // IndexQuery describes a query for entries @@ -196,6 +201,20 @@ func (s schema) GetChunksForSeries(from, through model.Time, userID string, seri return result, nil } +func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error) { + var result []IndexQuery + + buckets := s.buckets(from, through, userID) + for _, bucket := range buckets { + entries, err := s.entries.GetLabelNamesForSeries(bucket, seriesID) + if err != nil { + return nil, err + } + result = append(result, entries...) + } + return result, nil +} + type entries interface { GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) @@ -205,6 +224,7 @@ type entries interface { GetReadMetricLabelQueries(bucket Bucket, metricName string, labelName string) ([]IndexQuery, error) GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error) GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error) + GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error) } // original entries: @@ -276,6 +296,10 @@ func (originalEntries) GetChunksForSeries(_ Bucket, _ []byte) ([]IndexQuery, err return nil, ErrNotSupported } +func (originalEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error) { + return nil, ErrNotSupported +} + // v3Schema went to base64 encoded label values & a version ID // - range key: