From f360a18917e1bd32117ed391f6ef890dab1eabe1 Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Wed, 4 Mar 2020 15:32:08 +0200 Subject: [PATCH 1/5] Make spread flushes and no jitter the defaults for single process. Signed-off-by: Tom Wilkie --- docs/configuration/single-process-config.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/configuration/single-process-config.yaml b/docs/configuration/single-process-config.yaml index 3d6be91aa43..404f6c2d40f 100644 --- a/docs/configuration/single-process-config.yaml +++ b/docs/configuration/single-process-config.yaml @@ -28,7 +28,10 @@ ingester_client: use_gzip_compression: true ingester: - #chunk_idle_period: 15m + # We want our ingesters to flush chunks at the same time to optimise + # deduplication opportunities. + spreadflushes: true + chunkagejitter: 0 lifecycler: # The address to advertise for this ingester. Will be autodiscovered by From 3111f356499e7fd01a3ff2367c5cf8f3e09e236e Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Wed, 4 Mar 2020 15:32:24 +0200 Subject: [PATCH 2/5] Make bigchunk encoding the default. Signed-off-by: Tom Wilkie --- pkg/chunk/encoding/factory.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/chunk/encoding/factory.go b/pkg/chunk/encoding/factory.go index 5ac314d9d0f..95f2a61ccdc 100644 --- a/pkg/chunk/encoding/factory.go +++ b/pkg/chunk/encoding/factory.go @@ -15,7 +15,7 @@ type Config struct{} var ( // DefaultEncoding exported for use in unit tests elsewhere - DefaultEncoding = DoubleDelta + DefaultEncoding = Bigchunk alwaysMarshalFullsizeChunks = true bigchunkSizeCapBytes = 0 ) From e706548563edff451ba4eba818f4cf074bec80d3 Mon Sep 17 00:00:00 2001 From: Tom Wilkie Date: Wed, 4 Mar 2020 16:22:22 +0200 Subject: [PATCH 3/5] Add to changelog (and reorder a couple of entries.) Signed-off-by: Tom Wilkie --- CHANGELOG.md | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4eb67a8a57..27034ed29c3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,19 +2,9 @@ ## master / unreleased -* [FEATURE] Fan out parallelizable queries to backend queriers concurrently. #1878 - * `querier.parallelise-shardable-queries` (bool) - * Requires a shard-compatible schema (v10+) - * This causes the number of traces to increase accordingly. - * The query-frontend now requires a schema config to determine how/when to shard queries, either from a file or from flags (i.e. by the `config-yaml` CLI flag). This is the same schema config the queriers consume. The schema is only required to use this option. - * It's also advised to increase downstream concurrency controls as well: - * `querier.max-outstanding-requests-per-tenant` - * `querier.max-query-parallelism` - * `querier.max-concurrent` - * `server.grpc-max-concurrent-streams` (for both query-frontends and queriers) * [CHANGE] The frontend http server will now send 502 in case of deadline exceeded and 499 if the user requested cancellation. #2156 * [CHANGE] Config file changed to remove top level `config_store` field in favor of a nested `configdb` field. #2125 -* [CHANGE] We now enforce queries to be up to `-querier.max-query-into-future` into the future (defaults to 10m). #1929 +* [CHANGE] We now enforce queries to be up to `-querier.max-query-into-future` into the future (defaults to 10m). #1929 * [CHANGE] Removed unnecessary `frontend.cache-split-interval` in favor of `querier.split-queries-by-interval` both to reduce configuration complexity and guarantee alignment of these two configs. Starting from now, `-querier.cache-results` may only be enabled in conjunction with `-querier.split-queries-by-interval` (previously the cache interval default was `24h` so if you want to preserve the same behaviour you should set `-querier.split-queries-by-interval=24h`). #2040 * [CHANGE] Removed remaining support for using denormalised tokens in the ring. If you're still running ingesters with denormalised tokens (Cortex 0.4 or earlier, with `-ingester.normalise-tokens=false`), such ingesters will now be completely invisible to distributors and need to be either switched to Cortex 0.6.0 or later, or be configured to use normalised tokens. #2034 * [CHANGE] Moved `--store.min-chunk-age` to the Querier config as `--querier.query-store-after`, allowing the store to be skipped during query time if the metrics wouldn't be found. The YAML config option `ingestermaxquerylookback` has been renamed to `query_ingesters_within` to match its CLI flag. #1893 @@ -39,6 +29,7 @@ * [CHANGE] Cortex now has /ready probe for all services, not just ingester and querier as before. In single-binary mode, /ready reports 204 only if all components are running properly. #2166 * [CHANGE] Experimental TSDB: switched the blocks storage index header to the binary format. This change is expected to have no visible impact, except lower startup times and memory usage in the queriers. It's possible to switch back to the old JSON format via the flag `-experimental.tsdb.bucket-store.binary-index-header-enabled=false`. #2223 * [CHANGE] WAL replays are now done while the rest of Cortex is starting, and more specifically, when HTTP server is running. This makes it possible to scrape metrics during WAL replays. Applies to both chunks and experimental blocks storage. #2222 +* [CHANGE] Default to BigChunk encoding; may results in slightly high disk usage if many of you series have a constant value, but should generally result in fewer, bigger chunks. #2207 * [FEATURE] Added a read-only local alertmanager config store using files named corresponding to their tenant id. #2125 * [FEATURE] Added user sub rings to distribute users to a subset of ingesters. #1947 * `--experimental.distributor.user-subring-size` @@ -55,6 +46,16 @@ * [FEATURE] Add flag `-experimental.tsdb.stripe-size` to expose TSDB stripe size option. #2185 * [FEATURE] Experimental Delete Series: Added support for Deleting Series with Prometheus style API. Needs to be enabled first by setting `--purger.enable` to `true`. Deletion only supported when using `boltdb` and `filesystem` as index and object store respectively. Support for other stores to follow in separate PRs #2103 * [FEATURE] Introduced new standalone service `query-tee` that can be used for testing purposes to send the same Prometheus query to multiple backends (ie. two Cortex clusters ingesting the same metrics) and compare the performances. #2203 +* [FEATURE] Fan out parallelizable queries to backend queriers concurrently. #1878 + * `querier.parallelise-shardable-queries` (bool) + * Requires a shard-compatible schema (v10+) + * This causes the number of traces to increase accordingly. + * The query-frontend now requires a schema config to determine how/when to shard queries, either from a file or from flags (i.e. by the `config-yaml` CLI flag). This is the same schema config the queriers consume. The schema is only required to use this option. + * It's also advised to increase downstream concurrency controls as well: + * `querier.max-outstanding-requests-per-tenant` + * `querier.max-query-parallelism` + * `querier.max-concurrent` + * `server.grpc-max-concurrent-streams` (for both query-frontends and queriers) * [ENHANCEMENT] Alertmanager: Expose Per-tenant alertmanager metrics #2124 * [ENHANCEMENT] Add `status` label to `cortex_alertmanager_configs` metric to gauge the number of valid and invalid configs. #2125 * [ENHANCEMENT] Cassandra Authentication: added the `custom_authenticators` config option that allows users to authenticate with cassandra clusters using password authenticators that are not approved by default in [gocql](https://github.com/gocql/gocql/blob/81b8263d9fe526782a588ef94d3fa5c6148e5d67/conn.go#L27) #2093 From 869fe64b086162d13dc5daf7b3ea5608e83f042a Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 9 Mar 2020 10:24:42 +0100 Subject: [PATCH 4/5] Update CHANGELOG.md Co-Authored-By: Bryan Boreham --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 27034ed29c3..cb9986e5907 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,7 @@ * [CHANGE] Cortex now has /ready probe for all services, not just ingester and querier as before. In single-binary mode, /ready reports 204 only if all components are running properly. #2166 * [CHANGE] Experimental TSDB: switched the blocks storage index header to the binary format. This change is expected to have no visible impact, except lower startup times and memory usage in the queriers. It's possible to switch back to the old JSON format via the flag `-experimental.tsdb.bucket-store.binary-index-header-enabled=false`. #2223 * [CHANGE] WAL replays are now done while the rest of Cortex is starting, and more specifically, when HTTP server is running. This makes it possible to scrape metrics during WAL replays. Applies to both chunks and experimental blocks storage. #2222 -* [CHANGE] Default to BigChunk encoding; may results in slightly high disk usage if many of you series have a constant value, but should generally result in fewer, bigger chunks. #2207 +* [CHANGE] Default to BigChunk encoding; may result in slightly higher disk usage if many timeseries have a constant value, but should generally result in fewer, bigger chunks. #2207 * [FEATURE] Added a read-only local alertmanager config store using files named corresponding to their tenant id. #2125 * [FEATURE] Added user sub rings to distribute users to a subset of ingesters. #1947 * `--experimental.distributor.user-subring-size` From 44ada476fc4c633528a24e0b9019fb46eb287488 Mon Sep 17 00:00:00 2001 From: Marco Pracucci Date: Mon, 9 Mar 2020 14:38:27 +0100 Subject: [PATCH 5/5] Fixed tests Signed-off-by: Marco Pracucci --- pkg/chunk/cache/cache_test.go | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/pkg/chunk/cache/cache_test.go b/pkg/chunk/cache/cache_test.go index dcbaaae7972..ac586c74ec0 100644 --- a/pkg/chunk/cache/cache_test.go +++ b/pkg/chunk/cache/cache_test.go @@ -52,9 +52,24 @@ func fillCache(t *testing.T, cache cache.Cache) ([]string, []chunk.Chunk) { buf, err := c.Encoded() require.NoError(t, err) + // In order to be able to compare the expected chunk (this one) with the + // actual one (the one that will be fetched from the cache) we need to + // cleanup the chunk to avoid any internal references mismatch (ie. appender + // pointer). + cleanChunk := chunk.Chunk{ + UserID: c.UserID, + Fingerprint: c.Fingerprint, + From: c.From, + Through: c.Through, + Checksum: c.Checksum, + ChecksumSet: c.ChecksumSet, + } + err = cleanChunk.Decode(chunk.NewDecodeContext(), buf) + require.NoError(t, err) + keys = append(keys, c.ExternalKey()) bufs = append(bufs, buf) - chunks = append(chunks, c) + chunks = append(chunks, cleanChunk) } cache.Store(context.Background(), keys, bufs) @@ -75,7 +90,7 @@ func testCacheSingle(t *testing.T, cache cache.Cache, keys []string, chunks []ch require.NoError(t, err) err = c.Decode(chunk.NewDecodeContext(), bufs[0]) require.NoError(t, err) - require.Equal(t, c, chunks[index]) + require.Equal(t, chunks[index], c) } }