cortexproject · pracucci · Mar 19, 2020 · Mar 12, 2020 · Mar 18, 2020 · Mar 18, 2020
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -149,6 +149,7 @@ jobs:
           docker pull quay.io/cortexproject/cortex:v0.6.0
           docker pull shopify/bigtable-emulator:0.1.0
           docker pull rinscy/cassandra:3.11.0
+          docker pull memcached:1.6.1
     - run:
         name: Integration Tests
         command: |

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,6 +12,7 @@
   * `-flusher.flush-op-timeout` is duration after which a flush should timeout.
 * [ENHANCEMENT] Better re-use of connections to DynamoDB and S3. #2268
 * [ENHANCEMENT] Experimental TSDB: Add support for local `filesystem` backend. #2245
+* [ENHANCEMENT] Experimental TSDB: Added memcached support for the TSDB index cache. #2290
 * [ENHANCEMENT] Allow 1w (where w denotes week) and 1y (where y denotes year) when setting table period and retention. #2252 
 * [ENHANCEMENT] Added FIFO cache metrics for current number of entries and memory usage. #2270
 * [ENHANCEMENT] Output all config fields to /config API, including those with empty value. #2209

diff --git a/development/tsdb-blocks-storage-s3/config/cortex.yaml b/development/tsdb-blocks-storage-s3/config/cortex.yaml
@@ -39,6 +39,11 @@ tsdb:
   bucket_store:
     sync_dir: /tmp/cortex-tsdb-querier
 
+    index_cache:
+      backend: memcached
+      memcached:
+        addresses: dns+memcached:11211
+
   s3:
     endpoint:          minio:9000
     bucket_name:       cortex-tsdb

diff --git a/development/tsdb-blocks-storage-s3/docker-compose.yml b/development/tsdb-blocks-storage-s3/docker-compose.yml
@@ -18,6 +18,9 @@ services:
     volumes:
       - .data-minio:/data:delegated
 
+  memcached:
+    image: memcached:1.6
+
   configstore:
     image: nginx
     volumes:

diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md
@@ -2227,11 +2227,6 @@ bucket_store:
   # CLI flag: -experimental.tsdb.bucket-store.sync-interval
   [sync_interval: <duration> | default = 5m0s]
 
-  # Size in bytes of in-memory index cache used to speed up blocks index lookups
-  # (shared between all tenants).
-  # CLI flag: -experimental.tsdb.bucket-store.index-cache-size-bytes
-  [index_cache_size_bytes: <int> | default = 1073741824]
-
   # Max size - in bytes - of a per-tenant chunk pool, used to reduce memory
   # allocations.
   # CLI flag: -experimental.tsdb.bucket-store.max-chunk-pool-bytes
@@ -2271,6 +2266,54 @@ bucket_store:
   # CLI flag: -experimental.tsdb.bucket-store.consistency-delay
   [consistency_delay: <duration> | default = 0s]
 
+  index_cache:
+    # The index cache backend type. Supported values: inmemory, memcached.
+    # CLI flag: -experimental.tsdb.bucket-store.index-cache.backend
+    [backend: <string> | default = "inmemory"]
+
+    inmemory:
+      # Maximum size in bytes of in-memory index cache used to speed up blocks
+      # index lookups (shared between all tenants).
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.inmemory.max-size-bytes
+      [max_size_bytes: <int> | default = 1073741824]
+
+    memcached:
+      # Comma separated list of memcached addresses. Supported prefixes are:
+      # dns+ (looked up as an A/AAAA query), dnssrv+ (looked up as a SRV query,
+      # dnssrvnoa+ (looked up as a SRV query, with no A/AAAA lookup made after
+      # that).
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.addresses
+      [addresses: <string> | default = ""]
+
+      # The socket read/write timeout.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.timeout
+      [timeout: <duration> | default = 100ms]
+
+      # The maximum number of idle connections that will be maintained per
+      # address.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-idle-connections
+      [max_idle_connections: <int> | default = 16]
+
+      # The maximum number of concurrent asynchronous operations can occur.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-async-concurrency
+      [max_async_concurrency: <int> | default = 50]
+
+      # The maximum number of enqueued asynchronous operations allowed.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-async-buffer-size
+      [max_async_buffer_size: <int> | default = 10000]
+
+      # The maximum number of concurrent connections running get operations. If
+      # set to 0, concurrency is unlimited.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-get-multi-concurrency
+      [max_get_multi_concurrency: <int> | default = 100]
+
+      # The maximum number of keys a single underlying get operation should run.
+      # If more keys are specified, internally keys are splitted into multiple
+      # batches and fetched concurrently, honoring the max concurrency. If set
+      # to 0, the max batch size is unlimited.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-get-multi-batch-size
+      [max_get_multi_batch_size: <int> | default = 0]
+
 # How frequently does Cortex try to compact TSDB head. Block is only created if
 # data covers smallest block range. Must be greater than 0 and max 5 minutes.
 # CLI flag: -experimental.tsdb.head-compaction-interval

diff --git a/docs/operations/blocks-storage.md b/docs/operations/blocks-storage.md
@@ -72,6 +72,37 @@ Whenever the pool of compactors increase or decrease (ie. following up a scale u
 - `GET /compactor_ring`<br />
   Displays the status of the compactors ring, including the tokens owned by each compactor and an option to remove (forget) instances from the ring.
 
+## Index cache
+
+The querier supports a cache to speed up postings and series lookups from TSDB blocks indexes. Two backends are supported:
+
+- `inmemory`
+- `memcached`
+
+### In-memory index cache
+
+The `inmemory` index cache is **enabled by default** and its max size can be configured through the flag `-experimental.tsdb.bucket-store.index-cache.inmemory.max-size-bytes` (or config file). The trade-off of using the in-memory index cache is:
+
+- Pros: zero latency
+- Cons: increased querier memory usage, not shared across multiple querier replicas
+
+### Memcached index cache
+
+The `memcached` index cache allows to use [Memcached](https://memcached.org/) as cache backend. This cache backend is configured using `-experimental.tsdb.bucket-store.index-cache.backend=memcached` and requires the Memcached server(s) addresses via `-experimental.tsdb.bucket-store.index-cache.memcached.addresses` (or config file). The addresses are resolved using the [DNS service provider](dns-service-discovery.md).
+
+The trade-off of using the Memcached index cache is:
+
+- Pros: can scale beyond a single node memory (Memcached cluster), shared across multiple querier instances
+- Cons: higher latency in the cache round trip compared to the in-memory one
+
+The Memcached client uses a jump hash algorithm to shard cached entries across a cluster of Memcached servers. For this reason, you should make sure memcached servers are **not** behind any kind of load balancer and their address is configured so that servers are added/removed to the end of the list whenever a scale up/down occurs.
+
+For example, if you're running Memcached in Kubernetes, you may:
+
+1. Deploy your Memcached cluster using a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/)
+2. Create an [headless service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) for Memcached StatefulSet
+3. Configure the Cortex's Memcached client address using the `dnssrvnoa+` [service discovery](dns-service-discovery.md)
+
 ## Configuration
 
 The general [configuration documentation](../configuration/_index.md) also applied to a Cortex cluster running the blocks storage, with few differences:
@@ -134,11 +165,6 @@ tsdb:
     # CLI flag: -experimental.tsdb.bucket-store.sync-interval
     [sync_interval: <duration> | default = 5m0s]
 
-    # Size in bytes of in-memory index cache used to speed up blocks index
-    # lookups (shared between all tenants).
-    # CLI flag: -experimental.tsdb.bucket-store.index-cache-size-bytes
-    [index_cache_size_bytes: <int> | default = 1073741824]
-
     # Max size - in bytes - of a per-tenant chunk pool, used to reduce memory
     # allocations.
     # CLI flag: -experimental.tsdb.bucket-store.max-chunk-pool-bytes
@@ -178,6 +204,54 @@ tsdb:
     # CLI flag: -experimental.tsdb.bucket-store.consistency-delay
     [consistency_delay: <duration> | default = 0s]
 
+    index_cache:
+      # The index cache backend type. Supported values: inmemory, memcached.
+      # CLI flag: -experimental.tsdb.bucket-store.index-cache.backend
+      [backend: <string> | default = "inmemory"]
+
+      inmemory:
+        # Maximum size in bytes of in-memory index cache used to speed up blocks
+        # index lookups (shared between all tenants).
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.inmemory.max-size-bytes
+        [max_size_bytes: <int> | default = 1073741824]
+
+      memcached:
+        # Comma separated list of memcached addresses. Supported prefixes are:
+        # dns+ (looked up as an A/AAAA query), dnssrv+ (looked up as a SRV
+        # query, dnssrvnoa+ (looked up as a SRV query, with no A/AAAA lookup
+        # made after that).
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.addresses
+        [addresses: <string> | default = ""]
+
+        # The socket read/write timeout.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.timeout
+        [timeout: <duration> | default = 100ms]
+
+        # The maximum number of idle connections that will be maintained per
+        # address.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-idle-connections
+        [max_idle_connections: <int> | default = 16]
+
+        # The maximum number of concurrent asynchronous operations can occur.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-async-concurrency
+        [max_async_concurrency: <int> | default = 50]
+
+        # The maximum number of enqueued asynchronous operations allowed.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-async-buffer-size
+        [max_async_buffer_size: <int> | default = 10000]
+
+        # The maximum number of concurrent connections running get operations.
+        # If set to 0, concurrency is unlimited.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-get-multi-concurrency
+        [max_get_multi_concurrency: <int> | default = 100]
+
+        # The maximum number of keys a single underlying get operation should
+        # run. If more keys are specified, internally keys are splitted into
+        # multiple batches and fetched concurrently, honoring the max
+        # concurrency. If set to 0, the max batch size is unlimited.
+        # CLI flag: -experimental.tsdb.bucket-store.index-cache.memcached.max-get-multi-batch-size
+        [max_get_multi_batch_size: <int> | default = 0]
+
   # How frequently does Cortex try to compact TSDB head. Block is only created
   # if data covers smallest block range. Must be greater than 0 and max 5
   # minutes.

diff --git a/docs/operations/blocks-storage.template b/docs/operations/blocks-storage.template
@@ -72,6 +72,37 @@ Whenever the pool of compactors increase or decrease (ie. following up a scale u
 - `GET /compactor_ring`<br />
   Displays the status of the compactors ring, including the tokens owned by each compactor and an option to remove (forget) instances from the ring.
 
+## Index cache
+
+The querier supports a cache to speed up postings and series lookups from TSDB blocks indexes. Two backends are supported:
+
+- `inmemory`
+- `memcached`
+
+### In-memory index cache
+
+The `inmemory` index cache is **enabled by default** and its max size can be configured through the flag `-experimental.tsdb.bucket-store.index-cache.inmemory.max-size-bytes` (or config file). The trade-off of using the in-memory index cache is:
+
+- Pros: zero latency
+- Cons: increased querier memory usage, not shared across multiple querier replicas
+
+### Memcached index cache
+
+The `memcached` index cache allows to use [Memcached](https://memcached.org/) as cache backend. This cache backend is configured using `-experimental.tsdb.bucket-store.index-cache.backend=memcached` and requires the Memcached server(s) addresses via `-experimental.tsdb.bucket-store.index-cache.memcached.addresses` (or config file). The addresses are resolved using the [DNS service provider](dns-service-discovery.md).
+
+The trade-off of using the Memcached index cache is:
+
+- Pros: can scale beyond a single node memory (Memcached cluster), shared across multiple querier instances
+- Cons: higher latency in the cache round trip compared to the in-memory one
+
+The Memcached client uses a jump hash algorithm to shard cached entries across a cluster of Memcached servers. For this reason, you should make sure memcached servers are **not** behind any kind of load balancer and their address is configured so that servers are added/removed to the end of the list whenever a scale up/down occurs.
+
+For example, if you're running Memcached in Kubernetes, you may:
+
+1. Deploy your Memcached cluster using a [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/)
+2. Create an [headless service](https://kubernetes.io/docs/concepts/services-networking/service/#headless-services) for Memcached StatefulSet
+3. Configure the Cortex's Memcached client address using the `dnssrvnoa+` [service discovery](dns-service-discovery.md)
+
 ## Configuration
 
 The general [configuration documentation](../configuration/_index.md) also applied to a Cortex cluster running the blocks storage, with few differences:

diff --git a/docs/operations/dns-service-discovery.md b/docs/operations/dns-service-discovery.md
@@ -0,0 +1,21 @@
+---
+title: "DNS service discovery"
+linkTitle: "DNS service discovery"
+weight: 2
+slug: dns-service-discovery
+---
+
+Some clients in Cortex support service discovery via DNS to find addresses of backend servers to connect to (ie. caching servers). The clients supporting it are:
+
+- [Blocks storage's memcached index cache](blocks-storage.md#memcached-index-cache)
+
+## Supported discovery modes
+
+The DNS service discovery supports different discovery modes. A discovery mode is selected adding a specific prefix to the address. The supported prefixes are:
+
+- **`dns+`**<br />
+  The domain name after the prefix is looked up as an A/AAAA query. For example: `dns+memcached.local:11211`
+- **`dnssrv+`**<br />
+  The domain name after the prefix is looked up as a SRV query, and then each SRV record is resolved as an A/AAAA record. For example: `dnssrv+memcached.namespace.svc.cluster.local`
+- **`dnssrvnoa+`**<br />
+  The domain name after the prefix is looked up as a SRV query, with no A/AAAA lookup made after that. For example: `dnssrvnoa+memcached.namespace.svc.cluster.local`
diff --git a/docs/operations/query-auditor.md b/docs/operations/query-auditor.md
@@ -1,7 +1,7 @@
 ---
 title: "Query Auditor (tool)"
 linkTitle: "Query Auditor (tool)"
-weight: 2
+weight: 3
 slug: query-auditor
 ---
 

diff --git a/docs/operations/query-tee.md b/docs/operations/query-tee.md
@@ -1,7 +1,7 @@
 ---
 title: "Query Tee (service)"
 linkTitle: "Query Tee (service)"
-weight: 3
+weight: 4
 slug: query-tee
 ---
 

diff --git a/integration/e2e/cache/cache.go b/integration/e2e/cache/cache.go
@@ -0,0 +1,21 @@
+package e2ecache
+
+import (
+	"github.com/cortexproject/cortex/integration/e2e"
+)
+
+const (
+	MemcachedPort = 11211
+)
+
+func NewMemcached() *e2e.ConcreteService {
+	return e2e.NewConcreteService(
+		"memcached",
+		// If you change the image tag, remember to update it in the preloading done
+		// by CircleCI too (see .circleci/config.yml).
+		"memcached:1.6.1",
+		nil,
+		e2e.NewTCPReadinessProbe(MemcachedPort),
+		MemcachedPort,
+	)
+}
diff --git a/integration/e2e/service.go b/integration/e2e/service.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io/ioutil"
 	"math"
+	"net"
 	"os/exec"
 	"regexp"
 	"strconv"
@@ -249,7 +250,12 @@ func (s *ConcreteService) WaitStarted() (err error) {
 	}
 
 	for s.retryBackoff.Reset(); s.retryBackoff.Ongoing(); {
-		err = exec.Command("docker", "inspect", s.containerName()).Run()
+		// Enforce a timeout on the command execution because we've seen some flaky tests
+		// stuck here.
+		ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+		defer cancel()
+
+		err = exec.CommandContext(ctx, "docker", "inspect", s.containerName()).Run()
 		if err == nil {
 			return nil
 		}
@@ -390,6 +396,33 @@ func (p *HTTPReadinessProbe) Ready(service *ConcreteService) (err error) {
 	return fmt.Errorf("got no expected status code: %v, expected: %v", res.StatusCode, p.expectedStatus)
 }
 
+// TCPReadinessProbe checks readiness by ensure a TCP connection can be established.
+type TCPReadinessProbe struct {
+	port int
+}
+
+func NewTCPReadinessProbe(port int) *TCPReadinessProbe {
+	return &TCPReadinessProbe{
+		port: port,
+	}
+}
+
+func (p *TCPReadinessProbe) Ready(service *ConcreteService) (err error) {
+	endpoint := service.Endpoint(p.port)
+	if endpoint == "" {
+		return fmt.Errorf("cannot get service endpoint for port %d", p.port)
+	} else if endpoint == "stopped" {
+		return errors.New("service has stopped")
+	}
+
+	conn, err := net.DialTimeout("tcp", endpoint, time.Second)
+	if err != nil {
+		return err
+	}
+
+	return conn.Close()
+}
+
 // CmdReadinessProbe checks readiness by `Exec`ing a command (within container) which returns 0 to consider status being ready
 type CmdReadinessProbe struct {
 	cmd *Command