diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e02a1bc28b..ee9ed7e069d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -106,6 +106,7 @@ * [ENHANCEMENT] Add `-cassandra.num-connections` to allow increasing the number of TCP connections to each Cassandra server. #2666 * [ENHANCEMENT] Use separate Cassandra clients and connections for reads and writes. #2666 * [ENHANCEMENT] Add `-cassandra.reconnect-interval` to allow specifying the reconnect interval to a Cassandra server that has been marked `DOWN` by the gocql driver. Also change the default value of the reconnect interval from `60s` to `1s`. #2687 +* [ENHANCEMENT] Experimental TSDB: Applied a jitter to the period bucket scans in order to better distribute bucket operations over the time and increase the probability of hitting the shared cache (if configured). #2693 * [BUGFIX] Ruler: Ensure temporary rule files with special characters are properly mapped and cleaned up. #2506 * [BUGFIX] Fixes #2411, Ensure requests are properly routed to the prometheus api embedded in the query if `-server.path-prefix` is set. #2372 * [BUGFIX] Experimental TSDB: fixed chunk data corruption when querying back series using the experimental blocks storage. #2400 diff --git a/pkg/querier/blocks_scanner.go b/pkg/querier/blocks_scanner.go index b52a2359f56..c98ce3dd23a 100644 --- a/pkg/querier/blocks_scanner.go +++ b/pkg/querier/blocks_scanner.go @@ -88,7 +88,10 @@ func NewBlocksScanner(cfg BlocksScannerConfig, bucketClient objstore.Bucket, log prometheus.WrapRegistererWith(prometheus.Labels{"component": "querier"}, reg).MustRegister(d.fetchersMetrics) } - d.Service = services.NewTimerService(cfg.ScanInterval, d.starting, d.scan, nil) + // Apply a jitter to the sync frequency in order to increase the probability + // of hitting the shared cache (if any). + scanInterval := util.DurationWithJitter(cfg.ScanInterval, 0.2) + d.Service = services.NewTimerService(scanInterval, d.starting, d.scan, nil) return d } diff --git a/pkg/storegateway/gateway.go b/pkg/storegateway/gateway.go index 49609b0af81..8ce68c660a6 100644 --- a/pkg/storegateway/gateway.go +++ b/pkg/storegateway/gateway.go @@ -20,6 +20,7 @@ import ( "github.com/cortexproject/cortex/pkg/ring/kv" cortex_tsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" "github.com/cortexproject/cortex/pkg/storegateway/storegatewaypb" + "github.com/cortexproject/cortex/pkg/util" "github.com/cortexproject/cortex/pkg/util/services" ) @@ -226,12 +227,14 @@ func (g *StoreGateway) running(ctx context.Context) error { var ringTickerChan <-chan time.Time var ringLastState ring.ReplicationSet - syncTicker := time.NewTicker(g.storageCfg.BucketStore.SyncInterval) + // Apply a jitter to the sync frequency in order to increase the probability + // of hitting the shared cache (if any). + syncTicker := time.NewTicker(util.DurationWithJitter(g.storageCfg.BucketStore.SyncInterval, 0.2)) defer syncTicker.Stop() if g.gatewayCfg.ShardingEnabled { ringLastState, _ = g.ring.GetAll(ring.BlocksSync) // nolint:errcheck - ringTicker := time.NewTicker(g.gatewayCfg.ShardingRing.RingCheckPeriod) + ringTicker := time.NewTicker(util.DurationWithJitter(g.gatewayCfg.ShardingRing.RingCheckPeriod, 0.2)) defer ringTicker.Stop() ringTickerChan = ringTicker.C } diff --git a/pkg/util/time.go b/pkg/util/time.go index fc2b46a1f79..aeed6ab70d1 100644 --- a/pkg/util/time.go +++ b/pkg/util/time.go @@ -2,6 +2,7 @@ package util import ( "math" + "math/rand" "net/http" "strconv" "time" @@ -34,3 +35,10 @@ func ParseTime(s string) (int64, error) { } return 0, httpgrpc.Errorf(http.StatusBadRequest, "cannot parse %q to a valid timestamp", s) } + +func DurationWithJitter(input time.Duration, variancePerc float64) time.Duration { + variance := int64(float64(input) * variancePerc) + jitter := rand.Int63n(variance*2) - variance + + return input + time.Duration(jitter) +} diff --git a/pkg/util/time_test.go b/pkg/util/time_test.go index 9899c4bdbb7..ba160145a34 100644 --- a/pkg/util/time_test.go +++ b/pkg/util/time_test.go @@ -4,6 +4,7 @@ import ( "testing" "time" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -23,3 +24,13 @@ func TestTimeFromMillis(t *testing.T) { }) } } + +func TestDurationWithJitter(t *testing.T) { + const numRuns = 1000 + + for i := 0; i < numRuns; i++ { + actual := DurationWithJitter(time.Minute, 0.5) + assert.GreaterOrEqual(t, int64(actual), int64(30*time.Second)) + assert.LessOrEqual(t, int64(actual), int64(90*time.Second)) + } +}