diff --git a/CHANGELOG.md b/CHANGELOG.md
index 687ebe39aef..a0e76e1280c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,16 @@
 
 ## master / unreleased
 
+* [FEATURE] Fan out parallelizable queries to backend queriers concurrently.
+  * `-querier.sum-shards` (bool)
+  * Requires a shard-compatible schema (v10+)
+  * This causes the number of traces to increase accordingly.
+  * The query-frontend now requires a schema config to determine how/when to shard queries, either from a file or from flags (i.e. by the `config-yaml` CLI flag). This is the same schema config the queriers consume.
+  * It's also advised to increase downstream concurrency controls as well:
+    * `querier.max-outstanding-requests-per-tenant`
+    * `querier.max-query-parallelism`
+    * `querier.max-concurrent`
+    * `server.grpc-max-concurrent-streams` (for both query-frontends and queriers)
 * [BUGFIX] Fixed unnecessary CAS operations done by the HA tracker when the jitter is enabled. #1861
 
 ## 0.4.0 / 2019-12-02
diff --git a/docs/arguments.md b/docs/arguments.md
index a0dd1feabf1..92a566a9084 100644
--- a/docs/arguments.md
+++ b/docs/arguments.md
@@ -63,6 +63,28 @@ The ingester query API was improved over time, but defaults to the old behaviour
 
 ## Query Frontend
 
+- `-querier.sum-shards`
+
+   If set to true, will cause the query frontend to mutate incoming queries when possible by turning `sum` operations into sharded `sum` operations. This requires a shard-compatible schema (v10+). An abridged example:
+   `sum by (foo) (rate(bar{baz=”blip”}[1m]))` ->
+   ```
+   sum by (foo) (
+    sum by (foo) (rate(bar{baz=”blip”,__cortex_shard__=”0of16”}[1m])) or
+    sum by (foo) (rate(bar{baz=”blip”,__cortex_shard__=”1of16”}[1m])) or
+    ...
+    sum by (foo) (rate(bar{baz=”blip”,__cortex_shard__=”15of16”}[1m]))
+   )
+   ```
+   When enabled, the query-frontend requires a schema config to determine how/when to shard queries, either from a file or from flags (i.e. by the `config-yaml` CLI flag). This is the same schema config the queriers consume.
+   It's also advised to increase downstream concurrency controls as well to account for more queries of smaller sizes:
+
+   - `querier.max-outstanding-requests-per-tenant`
+   - `querier.max-query-parallelism`
+   - `querier.max-concurrent`
+   - `server.grpc-max-concurrent-streams`
+
+   Instrumentation (traces) also scale with the number of sharded queries and it's suggested to account for increased throughput there as well.
+
 - `-querier.align-querier-with-step`
 
    If set to true, will cause the query frontend to mutate incoming queries and align their start and end parameters to the step parameter of the query.  This improves the cacheability of the query results.
diff --git a/go.mod b/go.mod
index 755d4d040d1..e376e7a3142 100644
--- a/go.mod
+++ b/go.mod
@@ -79,6 +79,7 @@ require (
 	google.golang.org/api v0.11.0
 	google.golang.org/grpc v1.25.1
 	gopkg.in/yaml.v2 v2.2.2
+	sigs.k8s.io/yaml v1.1.0
 )
 
 // Override since git.apache.org is down.  The docs say to fetch from github.
diff --git a/pkg/chunk/chunk_store.go b/pkg/chunk/chunk_store.go
index da7e6eec604..9ab01275b53 100644
--- a/pkg/chunk/chunk_store.go
+++ b/pkg/chunk/chunk_store.go
@@ -436,6 +436,9 @@ func (c *store) lookupChunksByMetricName(ctx context.Context, userID string, fro
 }
 
 func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery) ([]IndexEntry, error) {
+	log, ctx := spanlogger.New(ctx, "store.lookupEntriesByQueries")
+	defer log.Span.Finish()
+
 	var lock sync.Mutex
 	var entries []IndexEntry
 	err := c.index.QueryPages(ctx, queries, func(query IndexQuery, resp ReadBatch) bool {
@@ -459,6 +462,9 @@ func (c *store) lookupEntriesByQueries(ctx context.Context, queries []IndexQuery
 }
 
 func (c *store) parseIndexEntries(ctx context.Context, entries []IndexEntry, matcher *labels.Matcher) ([]string, error) {
+	log, ctx := spanlogger.New(ctx, "store.parseIndexEntries")
+	defer log.Span.Finish()
+
 	result := make([]string, 0, len(entries))
 	for _, entry := range entries {
 		chunkKey, labelValue, _, err := parseChunkTimeRangeValue(entry.RangeValue, entry.Value)
diff --git a/pkg/chunk/chunk_store_test.go b/pkg/chunk/chunk_store_test.go
index 80632775a26..f4be5ab5f4a 100644
--- a/pkg/chunk/chunk_store_test.go
+++ b/pkg/chunk/chunk_store_test.go
@@ -77,6 +77,8 @@ func newTestChunkStoreConfig(t require.TestingT, schemaName string, storeCfg Sto
 		tbmConfig TableManagerConfig
 		schemaCfg = DefaultSchemaConfig("", schemaName, 0)
 	)
+	err := schemaCfg.Validate()
+	require.NoError(t, err)
 	flagext.DefaultValues(&tbmConfig)
 	storage := NewMockStorage()
 	tableManager, err := NewTableManager(tbmConfig, schemaCfg, maxChunkAge, storage, nil)
diff --git a/pkg/chunk/chunk_store_utils.go b/pkg/chunk/chunk_store_utils.go
index 856af86be18..f5511293696 100644
--- a/pkg/chunk/chunk_store_utils.go
+++ b/pkg/chunk/chunk_store_utils.go
@@ -10,6 +10,7 @@ import (
 	"github.com/prometheus/prometheus/promql"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
@@ -146,13 +147,13 @@ func (c *Fetcher) worker() {
 // FetchChunks fetches a set of chunks from cache and store. Note that the keys passed in must be
 // lexicographically sorted, while the returned chunks are not in the same order as the passed in chunks.
 func (c *Fetcher) FetchChunks(ctx context.Context, chunks []Chunk, keys []string) ([]Chunk, error) {
-	log, ctx := spanlogger.New(ctx, "ChunkStore.fetchChunks")
+	log, ctx := spanlogger.New(ctx, "ChunkStore.FetchChunks")
 	defer log.Span.Finish()
 
 	// Now fetch the actual chunk data from Memcache / S3
 	cacheHits, cacheBufs, _ := c.cache.Fetch(ctx, keys)
 
-	fromCache, missing, err := c.processCacheResponse(chunks, cacheHits, cacheBufs)
+	fromCache, missing, err := c.processCacheResponse(ctx, chunks, cacheHits, cacheBufs)
 	if err != nil {
 		level.Warn(log).Log("msg", "error fetching from cache", "err", err)
 	}
@@ -199,12 +200,14 @@ func (c *Fetcher) writeBackCache(ctx context.Context, chunks []Chunk) error {
 
 // ProcessCacheResponse decodes the chunks coming back from the cache, separating
 // hits and misses.
-func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
+func (c *Fetcher) processCacheResponse(ctx context.Context, chunks []Chunk, keys []string, bufs [][]byte) ([]Chunk, []Chunk, error) {
 	var (
 		requests  = make([]decodeRequest, 0, len(keys))
 		responses = make(chan decodeResponse)
 		missing   []Chunk
 	)
+	log, ctx := spanlogger.New(ctx, "Fetcher.processCacheResponse")
+	defer log.Span.Finish()
 
 	i, j := 0, 0
 	for i < len(chunks) && j < len(keys) {
@@ -229,6 +232,7 @@ func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]b
 	for ; i < len(chunks); i++ {
 		missing = append(missing, chunks[i])
 	}
+	level.Debug(log).Log("chunks", len(chunks), "decodeRequests", len(requests), "missing", len(missing))
 
 	go func() {
 		for _, request := range requests {
@@ -252,3 +256,10 @@ func (c *Fetcher) processCacheResponse(chunks []Chunk, keys []string, bufs [][]b
 	}
 	return found, missing, err
 }
+
+func injectShardLabels(chunks []Chunk, shard astmapper.ShardAnnotation) {
+	for i, chunk := range chunks {
+		chunk.Metric = append(chunk.Metric, shard.Label())
+		chunks[i] = chunk
+	}
+}
diff --git a/pkg/chunk/schema.go b/pkg/chunk/schema.go
index ce69abf8ed7..111373c5a0d 100644
--- a/pkg/chunk/schema.go
+++ b/pkg/chunk/schema.go
@@ -7,7 +7,11 @@ import (
 	"fmt"
 	"strings"
 
+	"strconv"
+
 	jsoniter "github.com/json-iterator/go"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 )
@@ -46,6 +50,7 @@ type Schema interface {
 	GetReadQueriesForMetric(from, through model.Time, userID string, metricName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabel(from, through model.Time, userID string, metricName string, labelName string) ([]IndexQuery, error)
 	GetReadQueriesForMetricLabelValue(from, through model.Time, userID string, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
+	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
 
 	// If the query resulted in series IDs, use this method to find chunks.
 	GetChunksForSeries(from, through model.Time, userID string, seriesID []byte) ([]IndexQuery, error)
@@ -114,7 +119,7 @@ func (s schema) GetCacheKeysAndLabelWriteEntries(from, through model.Time, userI
 		key := strings.Join([]string{
 			bucket.tableName,
 			bucket.hashKey,
-			string(labelsSeriesID(labels)),
+			string(LabelsSeriesID(labels)),
 		},
 			"-",
 		)
@@ -216,6 +221,10 @@ func (s schema) GetLabelNamesForSeries(from, through model.Time, userID string,
 	return result, nil
 }
 
+func (s schema) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return s.entries.FilterReadQueries(queries, shard)
+}
+
 type entries interface {
 	GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
 	GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error)
@@ -226,13 +235,23 @@ type entries interface {
 	GetReadMetricLabelValueQueries(bucket Bucket, metricName string, labelName string, labelValue string) ([]IndexQuery, error)
 	GetChunksForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
 	GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]IndexQuery, error)
+	FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery
+}
+
+// noops is a placeholder which can be embedded to provide default implementations
+type noops struct{}
+
+func (n noops) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) []IndexQuery {
+	return queries
 }
 
 // original entries:
 // - hash key: <userid>:<bucket>:<metric name>
 // - range key: <label name>\0<label value>\0<chunk name>
 
-type originalEntries struct{}
+type originalEntries struct {
+	noops
+}
 
 func (originalEntries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
@@ -349,7 +368,9 @@ func (base64Entries) GetReadMetricLabelValueQueries(bucket Bucket, metricName st
 //    - range key: \0<base64(label value)>\0<chunk name>\0<version 2>
 // 2) - hash key: <userid>:<hour bucket>:<metric name>
 //    - range key: \0\0<chunk name>\0<version 3>
-type labelNameInHashKeyEntries struct{}
+type labelNameInHashKeyEntries struct {
+	noops
+}
 
 func (labelNameInHashKeyEntries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
@@ -423,7 +444,9 @@ func (labelNameInHashKeyEntries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]I
 // v5 schema is an extension of v4, with the chunk end time in the
 // range key to improve query latency.  However, it did it wrong
 // so the chunk end times are ignored.
-type v5Entries struct{}
+type v5Entries struct {
+	noops
+}
 
 func (v5Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
@@ -496,7 +519,9 @@ func (v5Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error
 
 // v6Entries fixes issues with v5 time encoding being wrong (see #337), and
 // moves label value out of range key (see #199).
-type v6Entries struct{}
+type v6Entries struct {
+	noops
+}
 
 func (v6Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
 	chunkIDBytes := []byte(chunkID)
@@ -576,6 +601,7 @@ func (v6Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, error
 
 // v9Entries adds a layer of indirection between labels -> series -> chunks.
 type v9Entries struct {
+	noops
 }
 
 func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
@@ -583,7 +609,7 @@ func (v9Entries) GetWriteEntries(bucket Bucket, metricName string, labels labels
 }
 
 func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := labelsSeriesID(labels)
+	seriesID := LabelsSeriesID(labels)
 
 	entries := []IndexEntry{
 		// Entry for metricName -> seriesID
@@ -613,7 +639,7 @@ func (v9Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels l
 }
 
 func (v9Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := labelsSeriesID(labels)
+	seriesID := LabelsSeriesID(labels)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
@@ -683,7 +709,7 @@ func (v10Entries) GetWriteEntries(bucket Bucket, metricName string, labels label
 }
 
 func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := labelsSeriesID(labels)
+	seriesID := LabelsSeriesID(labels)
 
 	// read first 32 bits of the hash and use this to calculate the shard
 	shard := binary.BigEndian.Uint32(seriesID) % s.rowShards
@@ -716,7 +742,7 @@ func (s v10Entries) GetLabelWriteEntries(bucket Bucket, metricName string, label
 }
 
 func (v10Entries) GetChunkWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := labelsSeriesID(labels)
+	seriesID := LabelsSeriesID(labels)
 	encodedThroughBytes := encodeTime(bucket.through)
 
 	entries := []IndexEntry{
@@ -782,13 +808,29 @@ func (v10Entries) GetLabelNamesForSeries(_ Bucket, _ []byte) ([]IndexQuery, erro
 	return nil, ErrNotSupported
 }
 
+// FilterReadQueries will return only queries that match a certain shard
+func (v10Entries) FilterReadQueries(queries []IndexQuery, shard *astmapper.ShardAnnotation) (matches []IndexQuery) {
+	if shard == nil {
+		return queries
+	}
+
+	for _, query := range queries {
+		s := strings.Split(query.HashValue, ":")[0]
+		n, err := strconv.Atoi(s)
+		if err == nil && n == shard.Shard {
+			matches = append(matches, query)
+		}
+	}
+	return matches
+}
+
 // v11Entries builds on v10 but adds index entries for each series to store respective labels.
 type v11Entries struct {
 	v10Entries
 }
 
 func (s v11Entries) GetLabelWriteEntries(bucket Bucket, metricName string, labels labels.Labels, chunkID string) ([]IndexEntry, error) {
-	seriesID := labelsSeriesID(labels)
+	seriesID := LabelsSeriesID(labels)
 
 	// read first 32 bits of the hash and use this to calculate the shard
 	shard := binary.BigEndian.Uint32(seriesID) % s.rowShards
@@ -845,4 +887,5 @@ func (v11Entries) GetLabelNamesForSeries(bucket Bucket, seriesID []byte) ([]Inde
 			HashValue: string(seriesID),
 		},
 	}, nil
+
 }
diff --git a/pkg/chunk/schema_config.go b/pkg/chunk/schema_config.go
index 4e3e7683395..3c40601c56a 100644
--- a/pkg/chunk/schema_config.go
+++ b/pkg/chunk/schema_config.go
@@ -192,15 +192,29 @@ func (cfg *SchemaConfig) loadFromFile() error {
 // Validate the schema config and returns an error if the validation
 // doesn't pass
 func (cfg *SchemaConfig) Validate() error {
-	for _, periodCfg := range cfg.Configs {
+	for i, periodCfg := range cfg.Configs {
 		if err := periodCfg.validate(); err != nil {
 			return err
 		}
-	}
 
+		// apply default row shards
+		if periodCfg.RowShards == 0 {
+			periodCfg.RowShards = defaultRowShards(periodCfg.Schema)
+			cfg.Configs[i] = periodCfg
+		}
+	}
 	return nil
 }
 
+func defaultRowShards(schema string) uint32 {
+	switch schema {
+	case "v1", "v2", "v3", "v4", "v5", "v6", "v9":
+		return 0
+	default:
+		return 16
+	}
+}
+
 // ForEachAfter will call f() on every entry after t, splitting
 // entries if necessary so there is an entry starting at t
 func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)) {
@@ -219,7 +233,7 @@ func (cfg *SchemaConfig) ForEachAfter(t model.Time, f func(config *PeriodConfig)
 
 // CreateSchema returns the schema defined by the PeriodConfig
 func (cfg PeriodConfig) CreateSchema() Schema {
-	rowShards := uint32(16)
+	rowShards := defaultRowShards(cfg.Schema)
 	if cfg.RowShards > 0 {
 		rowShards = cfg.RowShards
 	}
diff --git a/pkg/chunk/schema_test.go b/pkg/chunk/schema_test.go
index 1749596c831..229bd36c01a 100644
--- a/pkg/chunk/schema_test.go
+++ b/pkg/chunk/schema_test.go
@@ -8,6 +8,7 @@ import (
 	"sort"
 	"testing"
 
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	jsoniter "github.com/json-iterator/go"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
@@ -386,5 +387,70 @@ func BenchmarkEncodeLabelsString(b *testing.B) {
 	}
 	b.Log("data size", len(data))
 	b.Log("decode", decoded)
+}
+
+// Ensure all currently defined entries can inhabit the entries interface
+func TestEnsureEntriesInhabitInterface(t *testing.T) {
+	var _ = []entries{
+		originalEntries{},
+		base64Entries{},
+		labelNameInHashKeyEntries{},
+		v5Entries{},
+		v6Entries{},
+		v9Entries{},
+		v10Entries{},
+		v11Entries{},
+	}
+}
 
+func TestV10IndexQueries(t *testing.T) {
+	fromShards := func(n int) (res []IndexQuery) {
+		for i := 0; i < n; i++ {
+			res = append(res, IndexQuery{
+				TableName:       "tbl",
+				HashValue:       fmt.Sprintf("%02d:%s:%s:%s", i, "hash", "metric", "label"),
+				RangeValueStart: []byte(string(i)),
+				ValueEqual:      []byte(string(i)),
+			})
+		}
+		return res
+	}
+
+	var testExprs = []struct {
+		name     string
+		queries  []IndexQuery
+		shard    *astmapper.ShardAnnotation
+		expected []IndexQuery
+	}{
+		{
+			name:     "passthrough when no shard specified",
+			queries:  fromShards(2),
+			shard:    nil,
+			expected: fromShards(2),
+		},
+		{
+			name:    "out of bounds shard returns 0 matches",
+			queries: fromShards(2),
+			shard: &astmapper.ShardAnnotation{
+				Shard: 3,
+			},
+			expected: nil,
+		},
+		{
+			name:    "return correct shard",
+			queries: fromShards(3),
+			shard: &astmapper.ShardAnnotation{
+				Shard: 1,
+			},
+			expected: []IndexQuery{fromShards(2)[1]},
+		},
+	}
+
+	for _, c := range testExprs {
+		t.Run(c.name, func(t *testing.T) {
+			s := v10Entries{}
+			filtered := s.FilterReadQueries(c.queries, c.shard)
+			require.Equal(t, c.expected, filtered)
+		})
+	}
 }
diff --git a/pkg/chunk/schema_util.go b/pkg/chunk/schema_util.go
index e507e7c02e9..e3f24cdd24e 100644
--- a/pkg/chunk/schema_util.go
+++ b/pkg/chunk/schema_util.go
@@ -48,7 +48,8 @@ func labelsString(ls labels.Labels) string {
 	return b.String()
 }
 
-func labelsSeriesID(ls labels.Labels) []byte {
+// LabelsSeriesID hashes a label set
+func LabelsSeriesID(ls labels.Labels) []byte {
 	h := sha256.Sum256([]byte(labelsString(ls)))
 	return encodeBase64Bytes(h[:])
 }
diff --git a/pkg/chunk/schema_util_test.go b/pkg/chunk/schema_util_test.go
index 96fc0aa10a3..41ae45e6fbb 100644
--- a/pkg/chunk/schema_util_test.go
+++ b/pkg/chunk/schema_util_test.go
@@ -38,7 +38,7 @@ func TestLabelSeriesID(t *testing.T) {
 			"RBNvo1WzZ4oRRq0W9+hknpT7T8If536DEMBg9hyq/4o",
 		},
 	} {
-		seriesID := string(labelsSeriesID(c.lbls))
+		seriesID := string(LabelsSeriesID(c.lbls))
 		assert.Equal(t, c.expected, seriesID, labelsString(c.lbls))
 	}
 }
diff --git a/pkg/chunk/series_store.go b/pkg/chunk/series_store.go
index 09d3c2beb0a..11a5656850e 100644
--- a/pkg/chunk/series_store.go
+++ b/pkg/chunk/series_store.go
@@ -14,6 +14,7 @@ import (
 	"github.com/weaveworks/common/httpgrpc"
 
 	"github.com/cortexproject/cortex/pkg/chunk/cache"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
 )
@@ -131,6 +132,15 @@ func (c *seriesStore) Get(ctx context.Context, userID string, from, through mode
 		return nil, err
 	}
 
+	// inject artificial __cortex_shard__ labels if present in the query. GetChunkRefs guarantees any chunk refs match the shard.
+	shard, _, err := astmapper.ShardFromMatchers(allMatchers)
+	if err != nil {
+		return nil, err
+	}
+	if shard != nil {
+		injectShardLabels(allChunks, *shard)
+	}
+
 	// Filter out chunks based on the empty matchers in the query.
 	filteredChunks := filterChunksByMatchers(allChunks, allMatchers)
 	return filteredChunks, nil
@@ -252,10 +262,21 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatchers", "metricName", metricName, "matchers", len(matchers))
 	defer log.Span.Finish()
 
+	// Check if one of the labels is a shard annotation, pass that information to lookupSeriesByMetricNameMatcher,
+	// and remove the label.
+	shard, shardLabelIndex, err := astmapper.ShardFromMatchers(matchers)
+	if err != nil {
+		return nil, err
+	}
+
+	if shard != nil {
+		matchers = append(matchers[:shardLabelIndex], matchers[shardLabelIndex+1:]...)
+	}
+
 	// Just get series for metric if there are no matchers
 	if len(matchers) == 0 {
 		indexLookupsPerQuery.Observe(1)
-		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, nil)
+		series, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, nil, shard)
 		if err != nil {
 			preIntersectionPerQuery.Observe(float64(len(series)))
 			postIntersectionPerQuery.Observe(float64(len(series)))
@@ -269,7 +290,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	indexLookupsPerQuery.Observe(float64(len(matchers)))
 	for _, matcher := range matchers {
 		go func(matcher *labels.Matcher) {
-			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, matcher)
+			ids, err := c.lookupSeriesByMetricNameMatcher(ctx, from, through, userID, metricName, matcher, shard)
 			if err != nil {
 				incomingErrors <- err
 				return
@@ -320,7 +341,7 @@ func (c *seriesStore) lookupSeriesByMetricNameMatchers(ctx context.Context, from
 	return ids, nil
 }
 
-func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher) ([]string, error) {
+func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from, through model.Time, userID, metricName string, matcher *labels.Matcher, shard *astmapper.ShardAnnotation) ([]string, error) {
 	log, ctx := spanlogger.New(ctx, "SeriesStore.lookupSeriesByMetricNameMatcher", "metricName", metricName, "matcher", matcher)
 	defer log.Span.Finish()
 
@@ -341,6 +362,10 @@ func (c *seriesStore) lookupSeriesByMetricNameMatcher(ctx context.Context, from,
 	}
 	level.Debug(log).Log("queries", len(queries))
 
+	queries = c.schema.FilterReadQueries(queries, shard)
+
+	level.Debug(log).Log("filteredQueries", len(queries))
+
 	entries, err := c.lookupEntriesByQueries(ctx, queries)
 	if e, ok := err.(CardinalityExceededError); ok {
 		e.MetricName = metricName
diff --git a/pkg/chunk/storage/caching_index_client.go b/pkg/chunk/storage/caching_index_client.go
index f1863653b79..ab74f521784 100644
--- a/pkg/chunk/storage/caching_index_client.go
+++ b/pkg/chunk/storage/caching_index_client.go
@@ -66,6 +66,9 @@ func (s *cachingIndexClient) Stop() {
 }
 
 func (s *cachingIndexClient) QueryPages(ctx context.Context, queries []chunk.IndexQuery, callback func(chunk.IndexQuery, chunk.ReadBatch) (shouldContinue bool)) error {
+	log, ctx := spanlogger.New(ctx, "cachingIndexClient.QueryPages")
+	defer log.Finish()
+
 	// We cache the entire row, so filter client side.
 	callback = chunk_util.QueryFilter(callback)
 
diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go
index 5c228378e14..df275671725 100644
--- a/pkg/cortex/modules.go
+++ b/pkg/cortex/modules.go
@@ -11,6 +11,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/prometheus/common/route"
 	"github.com/prometheus/prometheus/config"
+	"github.com/prometheus/prometheus/promql"
 	v1 "github.com/prometheus/prometheus/web/api/v1"
 	httpgrpc_server "github.com/weaveworks/common/httpgrpc/server"
 	"github.com/weaveworks/common/middleware"
@@ -27,6 +28,7 @@ import (
 	"github.com/cortexproject/cortex/pkg/ingester"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/querier"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 	"github.com/cortexproject/cortex/pkg/querier/frontend"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 	"github.com/cortexproject/cortex/pkg/ring"
@@ -194,7 +196,7 @@ func (t *Cortex) initQuerier(cfg *Config) (err error) {
 		return
 	}
 
-	var store querier.ChunkStore
+	var store chunkstore.ChunkStore
 
 	if cfg.Storage.Engine == storage.StorageEngineTSDB {
 		store, err = querier.NewBlockQuerier(cfg.TSDB, prometheus.DefaultRegisterer)
@@ -283,11 +285,30 @@ func (t *Cortex) stopStore() error {
 }
 
 func (t *Cortex) initQueryFrontend(cfg *Config) (err error) {
+	err = cfg.Schema.Load()
+	if err != nil {
+		return
+	}
+
 	t.frontend, err = frontend.New(cfg.Frontend, util.Logger)
 	if err != nil {
 		return
 	}
-	tripperware, err := queryrange.NewTripperware(cfg.QueryRange, util.Logger, t.overrides, queryrange.PrometheusCodec, queryrange.PrometheusResponseExtractor)
+	tripperware, err := queryrange.NewTripperware(
+		cfg.QueryRange,
+		util.Logger,
+		t.overrides,
+		queryrange.PrometheusCodec,
+		queryrange.PrometheusResponseExtractor,
+		cfg.Schema,
+		promql.EngineOpts{
+			Logger:        util.Logger,
+			Reg:           prometheus.DefaultRegisterer,
+			MaxConcurrent: cfg.Querier.MaxConcurrent,
+			MaxSamples:    cfg.Querier.MaxSamples,
+			Timeout:       cfg.Querier.Timeout,
+		},
+	)
 	if err != nil {
 		return err
 	}
diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go
index 2cbb5899d75..492fccf3cfa 100644
--- a/pkg/ingester/ingester.go
+++ b/pkg/ingester/ingester.go
@@ -412,6 +412,7 @@ func (i *Ingester) Query(ctx old_ctx.Context, req *client.QueryRequest) (*client
 	result := &client.QueryResponse{}
 	numSeries, numSamples := 0, 0
 	maxSamplesPerQuery := i.limits.MaxSamplesPerQuery(userID)
+
 	err = state.forSeriesMatching(ctx, matchers, func(ctx context.Context, _ model.Fingerprint, series *memorySeries) error {
 		values, err := series.samplesForRange(from, through)
 		if err != nil {
@@ -494,6 +495,7 @@ func (i *Ingester) QueryStream(req *client.QueryRequest, stream client.Ingester_
 		}
 
 		numChunks += len(wireChunks)
+
 		batch = append(batch, client.TimeSeriesChunk{
 			Labels: client.FromLabelsToLabelAdapters(series.metric),
 			Chunks: wireChunks,
diff --git a/pkg/ingester/series.go b/pkg/ingester/series.go
index ddbc470fb02..2f04a8cc54b 100644
--- a/pkg/ingester/series.go
+++ b/pkg/ingester/series.go
@@ -41,6 +41,13 @@ type memorySeries struct {
 	lastSampleValue    model.SampleValue
 }
 
+// WithMetric returns a shallow clone of a memory series with a different label set
+func (s *memorySeries) WithMetric(metric labels.Labels) *memorySeries {
+	c := *s
+	c.metric = metric
+	return &c
+}
+
 type memorySeriesError struct {
 	message   string
 	errorType string
diff --git a/pkg/ingester/user_state.go b/pkg/ingester/user_state.go
index e27d68411fd..f9e2235a0c8 100644
--- a/pkg/ingester/user_state.go
+++ b/pkg/ingester/user_state.go
@@ -2,8 +2,10 @@ package ingester
 
 import (
 	"context"
+	"encoding/binary"
 	"fmt"
 	"net/http"
+	"sort"
 	"sync"
 
 	"github.com/go-kit/kit/log/level"
@@ -13,8 +15,10 @@ import (
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/segmentio/fasthash/fnv1a"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/ingester/index"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/cortexproject/cortex/pkg/util/extract"
 	"github.com/cortexproject/cortex/pkg/util/spanlogger"
@@ -276,6 +280,16 @@ func (u *userState) forSeriesMatching(ctx context.Context, allMatchers []*labels
 	log, ctx := spanlogger.New(ctx, "forSeriesMatching")
 	defer log.Finish()
 
+	// Check if one of the labels is a shard annotation and remove the label.
+	shard, shardLabelIndex, err := astmapper.ShardFromMatchers(allMatchers)
+	if err != nil {
+		return log.Error(err)
+	}
+
+	if shard != nil {
+		allMatchers = append(allMatchers[:shardLabelIndex], allMatchers[shardLabelIndex+1:]...)
+	}
+
 	filters, matchers := util.SplitFiltersAndMatchers(allMatchers)
 	fps := u.index.Lookup(matchers)
 	if len(fps) > u.limiter.MaxSeriesPerQuery(u.userID) {
@@ -299,6 +313,20 @@ outer:
 			continue
 		}
 
+		if shard != nil {
+			// labels must be sorted for deterministic hash values
+			sort.Sort(series.metric)
+
+			if !matchesShard(shard, series) {
+				u.fpLocker.Unlock(fp)
+				continue
+			}
+
+			// inject the shard label in the return result but don't alter the resident memorySeries
+			metric := append(series.metric.Copy(), shard.Label())
+			series = series.WithMetric(metric)
+		}
+
 		for _, filter := range filters {
 			if !filter.Matches(series.metric.Get(filter.Name)) {
 				u.fpLocker.Unlock(fp)
@@ -325,3 +353,13 @@ outer:
 	}
 	return nil
 }
+
+func matchesShard(shard *astmapper.ShardAnnotation, series *memorySeries) bool {
+	if shard == nil {
+		return false
+	}
+	seriesID := chunk.LabelsSeriesID(series.metric)
+	// read first 32 bits of the hash and use this to calculate the shard
+	seriesShard := binary.BigEndian.Uint32(seriesID) % uint32(shard.Of)
+	return seriesShard == uint32(shard.Shard)
+}
diff --git a/pkg/ingester/user_state_test.go b/pkg/ingester/user_state_test.go
index 5df173dcf12..28e62813a1e 100644
--- a/pkg/ingester/user_state_test.go
+++ b/pkg/ingester/user_state_test.go
@@ -5,6 +5,7 @@ import (
 	"math"
 	"testing"
 
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 
@@ -22,6 +23,11 @@ func TestForSeriesMatchingBatching(t *testing.T) {
 	require.NoError(t, err)
 	matchNotJob1, err := labels.NewMatcher(labels.MatchNotEqual, model.JobLabel, "testjob1")
 	require.NoError(t, err)
+	matchFirstShard, err := labels.NewMatcher(labels.MatchEqual, astmapper.ShardLabel, astmapper.ShardAnnotation{
+		Shard: 0,
+		Of:    5,
+	}.String())
+	require.NoError(t, err)
 
 	for _, tc := range []struct {
 		numSeries, batchSize int
@@ -40,6 +46,8 @@ func TestForSeriesMatchingBatching(t *testing.T) {
 		{100, 10, []*labels.Matcher{matchAllNames, matchNotJob1}, 50},
 		{99, 10, []*labels.Matcher{matchAllNames, matchNotJob0}, 49},
 		{99, 10, []*labels.Matcher{matchAllNames, matchNotJob1}, 50},
+		// shard test
+		{100, 10, []*labels.Matcher{matchAllNames, matchFirstShard}, 15},
 	} {
 		t.Run(fmt.Sprintf("numSeries=%d,batchSize=%d,matchers=%s", tc.numSeries, tc.batchSize, tc.matchers), func(t *testing.T) {
 			_, ing := newDefaultTestStore(t)
@@ -54,6 +62,26 @@ func TestForSeriesMatchingBatching(t *testing.T) {
 				total, batch, batches := 0, 0, 0
 				err = instance.forSeriesMatching(ctx, tc.matchers,
 					func(_ context.Context, _ model.Fingerprint, s *memorySeries) error {
+						shard, _, err := astmapper.ShardFromMatchers(tc.matchers)
+						require.Nil(t, err)
+						// if expected to match a shard, make sure that it actually hashes to the correct shard
+						if shard != nil {
+							// ensure that the correct shard label is returned
+							var found bool
+							var idx int
+							for i, l := range s.metric {
+								if l == shard.Label() {
+									found = true
+									idx = i
+								}
+							}
+							require.True(t, found)
+							// splice out shard label to check shard hash
+							s.metric = append(s.metric[:idx], s.metric[idx+1:]...)
+
+							matches := matchesShard(shard, s)
+							require.True(t, matches)
+						}
 						batch++
 						return nil
 					},
@@ -72,3 +100,44 @@ func TestForSeriesMatchingBatching(t *testing.T) {
 		})
 	}
 }
+
+func TestMatchesShard(t *testing.T) {
+	mkSeries := func(s string) *memorySeries {
+		return &memorySeries{
+			metric: labels.Labels{
+				{Name: s, Value: "filler"},
+			},
+		}
+	}
+	for i, tc := range []struct {
+		shard    *astmapper.ShardAnnotation
+		series   *memorySeries
+		expected bool
+	}{
+		{
+			shard: &astmapper.ShardAnnotation{
+				Shard: 0,
+				Of:    2,
+			},
+			series:   mkSeries("a"),
+			expected: false,
+		},
+		{
+			shard:    nil,
+			series:   mkSeries("a"),
+			expected: false,
+		},
+		{
+			shard: &astmapper.ShardAnnotation{
+				Shard: 0,
+				Of:    2,
+			},
+			series:   mkSeries("succeeds"),
+			expected: true,
+		},
+	} {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			require.Equal(t, tc.expected, matchesShard(tc.shard, tc.series))
+		})
+	}
+}
diff --git a/pkg/querier/astmapper/astmapper.go b/pkg/querier/astmapper/astmapper.go
new file mode 100644
index 00000000000..a93cd69161d
--- /dev/null
+++ b/pkg/querier/astmapper/astmapper.go
@@ -0,0 +1,187 @@
+package astmapper
+
+import (
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/promql"
+)
+
+// ASTMapper is the exported interface for mapping between multiple AST representations
+type ASTMapper interface {
+	Map(node promql.Node) (promql.Node, error)
+}
+
+// MapperFunc is a function adapter for ASTMapper
+type MapperFunc func(node promql.Node) (promql.Node, error)
+
+// Map applies a mapperfunc as an ASTMapper
+func (fn MapperFunc) Map(node promql.Node) (promql.Node, error) {
+	return fn(node)
+}
+
+// MultiMapper can compose multiple ASTMappers
+type MultiMapper struct {
+	mappers []ASTMapper
+}
+
+// Map impls ASTMapper
+func (m *MultiMapper) Map(node promql.Node) (promql.Node, error) {
+	var result promql.Node = node
+	var err error
+
+	if len(m.mappers) == 0 {
+		return nil, errors.New("MultiMapper: No mappers registered")
+	}
+
+	for _, x := range m.mappers {
+		result, err = x.Map(result)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return result, nil
+
+}
+
+// Register adds ASTMappers into a multimapper.
+// Since registered functions are applied in the order they're registered, it's advised to register them
+// in decreasing priority and only operate on nodes that each function cares about, defaulting to CloneNode.
+func (m *MultiMapper) Register(xs ...ASTMapper) {
+	m.mappers = append(m.mappers, xs...)
+}
+
+// NewMultiMapper instaniates an ASTMapper from multiple ASTMappers
+func NewMultiMapper(xs ...ASTMapper) *MultiMapper {
+	m := &MultiMapper{}
+	m.Register(xs...)
+	return m
+}
+
+// CloneNode is a helper function to clone a node.
+func CloneNode(node promql.Node) (promql.Node, error) {
+	return promql.ParseExpr(node.String())
+}
+
+// NodeMapper either maps a single AST node or returns the unaltered node.
+// It also returns a bool to signal that no further recursion is necessary.
+// This is helpful because it allows mappers to only implement logic for node types they want to change.
+// It makes some mappers trivially easy to implement
+type NodeMapper interface {
+	MapNode(node promql.Node) (mapped promql.Node, finished bool, err error)
+}
+
+// NodeMapperFunc is an adapter for NodeMapper
+type NodeMapperFunc func(node promql.Node) (promql.Node, bool, error)
+
+// MapNode applies a NodeMapperFunc as a NodeMapper
+func (f NodeMapperFunc) MapNode(node promql.Node) (promql.Node, bool, error) {
+	return f(node)
+}
+
+// NewASTNodeMapper creates an ASTMapper from a NodeMapper
+func NewASTNodeMapper(mapper NodeMapper) ASTNodeMapper {
+	return ASTNodeMapper{mapper}
+}
+
+// ASTNodeMapper is an ASTMapper adapter which uses a NodeMapper internally.
+type ASTNodeMapper struct {
+	NodeMapper
+}
+
+// Map impls ASTMapper from a NodeMapper
+func (nm ASTNodeMapper) Map(node promql.Node) (promql.Node, error) {
+	node, fin, err := nm.MapNode(node)
+
+	if err != nil {
+		return nil, err
+	}
+
+	if fin {
+		return node, nil
+	}
+
+	switch n := node.(type) {
+	case nil:
+		// nil handles cases where we check optional fields that are not set
+		return nil, nil
+
+	case promql.Expressions:
+		for i, e := range n {
+			mapped, err := nm.Map(e)
+			if err != nil {
+				return nil, err
+			}
+			n[i] = mapped.(promql.Expr)
+		}
+		return n, nil
+
+	case *promql.AggregateExpr:
+		expr, err := nm.Map(n.Expr)
+		if err != nil {
+			return nil, err
+		}
+		n.Expr = expr.(promql.Expr)
+		return n, nil
+
+	case *promql.BinaryExpr:
+		lhs, err := nm.Map(n.LHS)
+		if err != nil {
+			return nil, err
+		}
+		n.LHS = lhs.(promql.Expr)
+
+		rhs, err := nm.Map(n.RHS)
+		if err != nil {
+			return nil, err
+		}
+		n.RHS = rhs.(promql.Expr)
+		return n, nil
+
+	case *promql.Call:
+		for i, e := range n.Args {
+			mapped, err := nm.Map(e)
+			if err != nil {
+				return nil, err
+			}
+			n.Args[i] = mapped.(promql.Expr)
+		}
+		return n, nil
+
+	case *promql.SubqueryExpr:
+		mapped, err := nm.Map(n.Expr)
+		if err != nil {
+			return nil, err
+		}
+		n.Expr = mapped.(promql.Expr)
+		return n, nil
+
+	case *promql.ParenExpr:
+		mapped, err := nm.Map(n.Expr)
+		if err != nil {
+			return nil, err
+		}
+		n.Expr = mapped.(promql.Expr)
+		return n, nil
+
+	case *promql.UnaryExpr:
+		mapped, err := nm.Map(n.Expr)
+		if err != nil {
+			return nil, err
+		}
+		n.Expr = mapped.(promql.Expr)
+		return n, nil
+
+	case *promql.EvalStmt:
+		mapped, err := nm.Map(n.Expr)
+		if err != nil {
+			return nil, err
+		}
+		n.Expr = mapped.(promql.Expr)
+		return n, nil
+
+	case *promql.NumberLiteral, *promql.StringLiteral, *promql.VectorSelector, *promql.MatrixSelector:
+		return n, nil
+
+	default:
+		panic(errors.Errorf("nodeMapper: unhandled node type %T", node))
+	}
+}
diff --git a/pkg/querier/astmapper/astmapper_test.go b/pkg/querier/astmapper/astmapper_test.go
new file mode 100644
index 00000000000..c3f0c0b9f1f
--- /dev/null
+++ b/pkg/querier/astmapper/astmapper_test.go
@@ -0,0 +1,102 @@
+package astmapper
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestCloneNode(t *testing.T) {
+	var testExpr = []struct {
+		input    promql.Expr
+		expected promql.Expr
+	}{
+		// simple unmodified case
+		{
+			&promql.BinaryExpr{
+				Op:  promql.ItemADD,
+				LHS: &promql.NumberLiteral{Val: 1},
+				RHS: &promql.NumberLiteral{Val: 1},
+			},
+			&promql.BinaryExpr{
+				Op:  promql.ItemADD,
+				LHS: &promql.NumberLiteral{Val: 1},
+				RHS: &promql.NumberLiteral{Val: 1},
+			},
+		},
+		{
+			&promql.AggregateExpr{
+				Op:      promql.ItemSum,
+				Without: true,
+				Expr: &promql.VectorSelector{
+					Name: "some_metric",
+					LabelMatchers: []*labels.Matcher{
+						mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "some_metric"),
+					},
+				},
+				Grouping: []string{"foo"},
+			},
+			&promql.AggregateExpr{
+				Op:      promql.ItemSum,
+				Without: true,
+				Expr: &promql.VectorSelector{
+					Name: "some_metric",
+					LabelMatchers: []*labels.Matcher{
+						mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "some_metric"),
+					},
+				},
+				Grouping: []string{"foo"},
+			},
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			res, err := CloneNode(c.input)
+			require.NoError(t, err)
+			require.Equal(t, c.expected, res)
+		})
+	}
+}
+
+func TestCloneNode_String(t *testing.T) {
+	var testExpr = []struct {
+		input    string
+		expected string
+	}{
+		{
+			input:    `rate(http_requests_total{cluster="us-central1"}[1m])`,
+			expected: `rate(http_requests_total{cluster="us-central1"}[1m])`,
+		},
+		{
+			input: `sum(
+sum(rate(http_requests_total{cluster="us-central1"}[1m]))
+/
+sum(rate(http_requests_total{cluster="ops-tools1"}[1m]))
+)`,
+			expected: `sum(sum(rate(http_requests_total{cluster="us-central1"}[1m])) / sum(rate(http_requests_total{cluster="ops-tools1"}[1m])))`,
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			expr, err := promql.ParseExpr(c.input)
+			require.Nil(t, err)
+			res, err := CloneNode(expr)
+			require.Nil(t, err)
+			require.Equal(t, c.expected, res.String())
+		})
+	}
+}
+
+func mustLabelMatcher(mt labels.MatchType, name, val string) *labels.Matcher {
+	m, err := labels.NewMatcher(mt, name, val)
+	if err != nil {
+		panic(err)
+	}
+	return m
+}
diff --git a/pkg/querier/astmapper/embedded.go b/pkg/querier/astmapper/embedded.go
new file mode 100644
index 00000000000..f71abb40226
--- /dev/null
+++ b/pkg/querier/astmapper/embedded.go
@@ -0,0 +1,120 @@
+package astmapper
+
+import (
+	"encoding/json"
+	"time"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+)
+
+/*
+Design:
+
+The prometheus api package enforces a (*promql.Engine argument), making it infeasible to do lazy AST
+evaluation and substitution from within this package.
+This leaves the (storage.Queryable) interface as the remaining target for conducting application level sharding.
+
+The main idea is to analyze the AST and determine which subtrees can be parallelized. With those in hand, the queries may
+be remapped into vector or matrix selectors utilizing a reserved label containing the original query. These may then be parallelized in the storage implementation.
+*/
+
+const (
+	// QueryLabel is a reserved label containing an embedded query
+	QueryLabel = "__cortex_queries__"
+	// EmbeddedQueryFlag is a reserved label (metric name) denoting an embedded query
+	EmbeddedQueryFlag = "__embedded_queries__"
+)
+
+// EmbeddedQueries is a wrapper type for encoding queries
+type EmbeddedQueries struct {
+	Concat []string `json:"Concat"`
+}
+
+// JSONCodec is a Codec impl that uses JSON representations of EmbeddedQueries structs
+var JSONCodec Codec = jsonCodec{}
+
+type jsonCodec struct{}
+
+func (c jsonCodec) Encode(queries []string) string {
+	embedded := EmbeddedQueries{
+		Concat: queries,
+	}
+	b, err := json.Marshal(embedded)
+
+	if err != nil {
+		panic(err)
+	}
+
+	return string(b)
+}
+
+func (c jsonCodec) Decode(encoded string) (queries []string, err error) {
+	var embedded EmbeddedQueries
+	err = json.Unmarshal([]byte(encoded), &embedded)
+	if err != nil {
+		return nil, err
+	}
+
+	return embedded.Concat, nil
+}
+
+// A Codec is responsible for encoding/decoding queries
+type Codec interface {
+	Encode([]string) string
+	Decode(string) ([]string, error)
+}
+
+// Squash reduces an AST into a single vector or matrix query which can be hijacked by a Queryable impl.
+func Squash(codec Codec, isMatrix bool, nodes ...promql.Node) (promql.Expr, error) {
+	if codec == nil {
+		return nil, errors.Errorf("nil Codec")
+	}
+
+	// concat OR legs
+	var strs []string
+	for _, node := range nodes {
+		strs = append(strs, node.String())
+	}
+
+	encoded := codec.Encode(strs)
+
+	embeddedQuery, err := labels.NewMatcher(labels.MatchEqual, QueryLabel, encoded)
+
+	if err != nil {
+		return nil, err
+	}
+
+	if isMatrix {
+		return &promql.MatrixSelector{
+			Name:          EmbeddedQueryFlag,
+			Range:         time.Minute,
+			LabelMatchers: []*labels.Matcher{embeddedQuery},
+		}, nil
+	}
+
+	return &promql.VectorSelector{
+		Name:          EmbeddedQueryFlag,
+		LabelMatchers: []*labels.Matcher{embeddedQuery},
+	}, nil
+}
+
+// VectorSquasher always uses a VectorSelector as the substitution node.
+// This is important because logical/set binops can only be applied against vectors and not matrices.
+func VectorSquasher(nodes ...promql.Node) (promql.Expr, error) {
+	return Squash(JSONCodec, false, nodes...)
+}
+
+// OrSquasher is a custom squasher which mimics the intuitive but less efficient OR'ing of sharded vectors.
+func OrSquasher(nodes ...promql.Node) (promql.Expr, error) {
+	combined := nodes[0]
+	for i := 1; i < len(nodes); i++ {
+		combined = &promql.BinaryExpr{
+			Op:  promql.ItemLOR,
+			LHS: combined.(promql.Expr),
+			RHS: nodes[i].(promql.Expr),
+		}
+	}
+	return combined.(promql.Expr), nil
+}
diff --git a/pkg/querier/astmapper/parallel.go b/pkg/querier/astmapper/parallel.go
new file mode 100644
index 00000000000..f8302e8133a
--- /dev/null
+++ b/pkg/querier/astmapper/parallel.go
@@ -0,0 +1,91 @@
+package astmapper
+
+import (
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/promql"
+)
+
+var summableAggregates = map[promql.ItemType]struct{}{
+	promql.ItemSum:     {},
+	promql.ItemMin:     {},
+	promql.ItemMax:     {},
+	promql.ItemTopK:    {},
+	promql.ItemBottomK: {},
+	promql.ItemCount:   {},
+}
+
+// CanParallel tests is a subtree is parallelizable..
+// A subtree is parallelizable if all of its components are parallelizable.
+func CanParallel(node promql.Node) bool {
+	switch n := node.(type) {
+	case nil:
+		// nil handles cases where we check optional fields that are not set
+		return true
+
+	case promql.Expressions:
+		for _, e := range n {
+			if !CanParallel(e) {
+				return false
+			}
+		}
+		return true
+
+	case *promql.AggregateExpr:
+		_, ok := summableAggregates[n.Op]
+		return ok && CanParallel(n.Expr)
+
+	case *promql.BinaryExpr:
+		// since binary exprs use each side for merging, they cannot be parallelized
+		return false
+
+	case *promql.Call:
+		if n.Func == nil {
+			return false
+		}
+		if !ParallelFunc(*n.Func) {
+			return false
+		}
+
+		for _, e := range n.Args {
+			if !CanParallel(e) {
+				return false
+			}
+		}
+		return true
+
+	case *promql.SubqueryExpr:
+		return CanParallel(n.Expr)
+
+	case *promql.ParenExpr:
+		return CanParallel(n.Expr)
+
+	case *promql.UnaryExpr:
+		// Since these are only currently supported for Scalars, should be parallel-compatible
+		return true
+
+	case *promql.EvalStmt:
+		return CanParallel(n.Expr)
+
+	case *promql.MatrixSelector, *promql.NumberLiteral, *promql.StringLiteral, *promql.VectorSelector:
+		return true
+
+	default:
+		panic(errors.Errorf("CanParallel: unhandled node type %T", node))
+	}
+
+}
+
+// ParallelFunc ensures that a promql function can be part of a parallel query.
+func ParallelFunc(f promql.Function) bool {
+	unallowed := []string{
+		"histogram_quantile",
+		"quantile_over_time",
+	}
+
+	for _, v := range unallowed {
+		if v == f.Name {
+			return false
+		}
+	}
+	return true
+}
diff --git a/pkg/querier/astmapper/parallel_test.go b/pkg/querier/astmapper/parallel_test.go
new file mode 100644
index 00000000000..6032137f57b
--- /dev/null
+++ b/pkg/querier/astmapper/parallel_test.go
@@ -0,0 +1,119 @@
+package astmapper
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestCanParallel(t *testing.T) {
+	var testExpr = []struct {
+		input    promql.Expr
+		expected bool
+	}{
+		// simple sum
+		{
+			&promql.AggregateExpr{
+				Op:      promql.ItemSum,
+				Without: true,
+				Expr: &promql.VectorSelector{
+					Name: "some_metric",
+					LabelMatchers: []*labels.Matcher{
+						mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "some_metric"),
+					},
+				},
+				Grouping: []string{"foo"},
+			},
+			true,
+		},
+		/*
+			  sum(
+				  sum by (foo) bar1{baz=”blip”}[1m])
+				/
+				  sum by (foo) bar2{baz=”blip”}[1m]))
+			  )
+		*/
+		{
+			&promql.AggregateExpr{
+				Op: promql.ItemSum,
+				Expr: &promql.BinaryExpr{
+					Op: promql.ItemDIV,
+					LHS: &promql.AggregateExpr{
+						Op:       promql.ItemSum,
+						Grouping: []string{"foo"},
+						Expr: &promql.VectorSelector{
+							Name: "idk",
+							LabelMatchers: []*labels.Matcher{
+								mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "bar1"),
+							}},
+					},
+					RHS: &promql.AggregateExpr{
+						Op:       promql.ItemSum,
+						Grouping: []string{"foo"},
+						Expr: &promql.VectorSelector{
+							Name: "idk",
+							LabelMatchers: []*labels.Matcher{
+								mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "bar2"),
+							}},
+					},
+				},
+			},
+			false,
+		},
+		// sum by (foo) bar1{baz=”blip”}[1m]) ---- this is the first leg of the above
+		{
+			&promql.AggregateExpr{
+				Op:       promql.ItemSum,
+				Grouping: []string{"foo"},
+				Expr: &promql.VectorSelector{
+					Name: "idk",
+					LabelMatchers: []*labels.Matcher{
+						mustLabelMatcher(labels.MatchEqual, string(model.MetricNameLabel), "bar1"),
+					}},
+			},
+			true,
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			res := CanParallel(c.input)
+			require.Equal(t, c.expected, res)
+		})
+	}
+}
+
+func TestCanParallel_String(t *testing.T) {
+	var testExpr = []struct {
+		input    string
+		expected bool
+	}{
+		{
+			`sum by (foo) (rate(bar1{baz="blip"}[1m]))`,
+			true,
+		},
+		{
+			`sum by (foo) (histogram_quantile(0.9, rate(http_request_duration_seconds_bucket[10m])))`,
+			false,
+		},
+		{
+			`sum by (foo) (
+			  quantile_over_time(0.9, http_request_duration_seconds_bucket[10m])
+			)`,
+			false,
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			expr, err := promql.ParseExpr(c.input)
+			require.Nil(t, err)
+			res := CanParallel(expr)
+			require.Equal(t, c.expected, res)
+		})
+	}
+}
diff --git a/pkg/querier/astmapper/shard_summer.go b/pkg/querier/astmapper/shard_summer.go
new file mode 100644
index 00000000000..4face8feb16
--- /dev/null
+++ b/pkg/querier/astmapper/shard_summer.go
@@ -0,0 +1,293 @@
+package astmapper
+
+import (
+	"fmt"
+
+	"regexp"
+
+	"strings"
+
+	"strconv"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+)
+
+const (
+	// ShardLabel is a reserved label referencing a cortex shard
+	ShardLabel = "__cortex_shard__"
+	// ShardLabelFmt is the fmt of the ShardLabel key.
+	ShardLabelFmt = "%d_of_%d"
+)
+
+var (
+	// ShardLabelRE matches a value in ShardLabelFmt
+	ShardLabelRE = regexp.MustCompile("^[0-9]+_of_[0-9]+$")
+)
+
+type squasher = func(...promql.Node) (promql.Expr, error)
+
+type shardSummer struct {
+	shards   int
+	curshard *int
+	squash   squasher
+}
+
+// NewShardSummer instantiates an ASTMapper which will fan out sums queries by shard
+func NewShardSummer(shards int, squasher squasher) (ASTMapper, error) {
+	if squasher == nil {
+		return nil, errors.Errorf("squasher required and not passed")
+	}
+
+	return NewASTNodeMapper(&shardSummer{
+		shards:   shards,
+		squash:   squasher,
+		curshard: nil,
+	}), nil
+}
+
+// CopyWithCurShard clones a shardSummer with a new current shard.
+func (summer *shardSummer) CopyWithCurShard(curshard int) *shardSummer {
+	s := *summer
+	s.curshard = &curshard
+	return &s
+}
+
+// shardSummer expands a query AST by sharding and re-summing when possible
+func (summer *shardSummer) MapNode(node promql.Node) (promql.Node, bool, error) {
+
+	switch n := node.(type) {
+	case *promql.AggregateExpr:
+		if CanParallel(n) && n.Op == promql.ItemSum {
+			result, err := summer.shardSum(n)
+			return result, true, err
+		}
+
+		return n, false, nil
+
+	case *promql.VectorSelector:
+		if summer.curshard != nil {
+			mapped, err := shardVectorSelector(*summer.curshard, summer.shards, n)
+			return mapped, true, err
+		}
+		return n, true, nil
+
+	case *promql.MatrixSelector:
+		if summer.curshard != nil {
+			mapped, err := shardMatrixSelector(*summer.curshard, summer.shards, n)
+			return mapped, true, err
+		}
+		return n, true, nil
+
+	default:
+		return n, false, nil
+	}
+}
+
+// shardSum contains the logic for how we split/stitch legs of a parallelized sum query
+func (summer *shardSummer) shardSum(expr *promql.AggregateExpr) (promql.Node, error) {
+
+	parent, subSums, err := summer.splitSum(expr)
+	if err != nil {
+		return nil, err
+	}
+
+	combinedSums, err := summer.squash(subSums...)
+
+	if err != nil {
+		return nil, err
+	}
+
+	parent.Expr = combinedSums
+	return parent, nil
+}
+
+// splitSum forms the parent and child legs of a parallel query
+func (summer *shardSummer) splitSum(
+	expr *promql.AggregateExpr,
+) (
+	parent *promql.AggregateExpr,
+	children []promql.Node,
+	err error,
+) {
+	parent = &promql.AggregateExpr{
+		Op:    expr.Op,
+		Param: expr.Param,
+	}
+	var mkChild func(sharded *promql.AggregateExpr) promql.Expr
+
+	if expr.Without {
+		/*
+			parallelizing a sum using without(foo) is representable naively as
+			sum without(foo) (
+			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)
+			or (more optimized):
+			sum without(__cortex_shard__) (
+			  sum without(foo) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum without(foo) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)
+
+		*/
+		parent.Grouping = []string{ShardLabel}
+		parent.Without = true
+		mkChild = func(sharded *promql.AggregateExpr) promql.Expr {
+			sharded.Grouping = expr.Grouping
+			sharded.Without = true
+			return sharded
+		}
+	} else if len(expr.Grouping) > 0 {
+		/*
+			parallelizing a sum using by(foo) is representable as
+			sum by(foo) (
+			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)
+		*/
+		parent.Grouping = expr.Grouping
+		mkChild = func(sharded *promql.AggregateExpr) promql.Expr {
+			groups := make([]string, 0, len(expr.Grouping)+1)
+			groups = append(groups, expr.Grouping...)
+			groups = append(groups, ShardLabel)
+			sharded.Grouping = groups
+			return sharded
+		}
+	} else {
+		/*
+			parallelizing a non-parameterized sum is representable as
+			sum(
+			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum without(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)
+			or (more optimized):
+			sum without(__cortex_shard__) (
+			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)
+		*/
+		parent.Grouping = []string{ShardLabel}
+		parent.Without = true
+		mkChild = func(sharded *promql.AggregateExpr) promql.Expr {
+			sharded.Grouping = []string{ShardLabel}
+			return sharded
+		}
+	}
+
+	// iterate across shardFactor to create children
+	for i := 0; i < summer.shards; i++ {
+		cloned, err := CloneNode(expr.Expr)
+		if err != nil {
+			return parent, children, err
+		}
+
+		subSummer := NewASTNodeMapper(summer.CopyWithCurShard(i))
+		sharded, err := subSummer.Map(cloned)
+		if err != nil {
+			return parent, children, err
+		}
+
+		subSum := mkChild(&promql.AggregateExpr{
+			Op:   expr.Op,
+			Expr: sharded.(promql.Expr),
+		})
+
+		children = append(children,
+			subSum,
+		)
+	}
+
+	return parent, children, nil
+}
+
+func shardVectorSelector(curshard, shards int, selector *promql.VectorSelector) (promql.Node, error) {
+	shardMatcher, err := labels.NewMatcher(labels.MatchEqual, ShardLabel, fmt.Sprintf(ShardLabelFmt, curshard, shards))
+	if err != nil {
+		return nil, err
+	}
+
+	return &promql.VectorSelector{
+		Name:   selector.Name,
+		Offset: selector.Offset,
+		LabelMatchers: append(
+			[]*labels.Matcher{shardMatcher},
+			selector.LabelMatchers...,
+		),
+	}, nil
+}
+
+func shardMatrixSelector(curshard, shards int, selector *promql.MatrixSelector) (promql.Node, error) {
+	shardMatcher, err := labels.NewMatcher(labels.MatchEqual, ShardLabel, fmt.Sprintf(ShardLabelFmt, curshard, shards))
+	if err != nil {
+		return nil, err
+	}
+
+	return &promql.MatrixSelector{
+		Name:   selector.Name,
+		Range:  selector.Range,
+		Offset: selector.Offset,
+		LabelMatchers: append(
+			[]*labels.Matcher{shardMatcher},
+			selector.LabelMatchers...,
+		),
+	}, nil
+}
+
+// ParseShard will extract the shard information encoded in ShardLabelFmt
+func ParseShard(input string) (parsed ShardAnnotation, err error) {
+	if !ShardLabelRE.MatchString(input) {
+		return parsed, errors.Errorf("Invalid ShardLabel value: [%s]", input)
+	}
+
+	matches := strings.Split(input, "_")
+	x, err := strconv.Atoi(matches[0])
+	if err != nil {
+		return parsed, err
+	}
+	of, err := strconv.Atoi(matches[2])
+	if err != nil {
+		return parsed, err
+	}
+
+	if x >= of {
+		return parsed, errors.Errorf("Shards out of bounds: [%d] >= [%d]", x, of)
+	}
+	return ShardAnnotation{
+		Shard: x,
+		Of:    of,
+	}, err
+}
+
+// ShardAnnotation is a convenience struct which holds data from a parsed shard label
+type ShardAnnotation struct {
+	Shard int
+	Of    int
+}
+
+// String encodes a shardAnnotation into a label value
+func (shard ShardAnnotation) String() string {
+	return fmt.Sprintf(ShardLabelFmt, shard.Shard, shard.Of)
+}
+
+// Label generates the ShardAnnotation as a label
+func (shard ShardAnnotation) Label() labels.Label {
+	return labels.Label{
+		Name:  ShardLabel,
+		Value: shard.String(),
+	}
+}
+
+// ShardFromMatchers extracts a ShardAnnotation and the index it was pulled from in the matcher list
+func ShardFromMatchers(matchers []*labels.Matcher) (shard *ShardAnnotation, idx int, err error) {
+	for i, matcher := range matchers {
+		if matcher.Name == ShardLabel && matcher.Type == labels.MatchEqual {
+			shard, err := ParseShard(matcher.Value)
+			if err != nil {
+				return nil, i, err
+			}
+			return &shard, i, nil
+		}
+	}
+	return nil, 0, nil
+}
diff --git a/pkg/querier/astmapper/shard_summer_test.go b/pkg/querier/astmapper/shard_summer_test.go
new file mode 100644
index 00000000000..9182e2d87ac
--- /dev/null
+++ b/pkg/querier/astmapper/shard_summer_test.go
@@ -0,0 +1,256 @@
+package astmapper
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestShardSummer(t *testing.T) {
+	var testExpr = []struct {
+		shards   int
+		input    string
+		expected string
+	}{
+		{
+			shards: 3,
+			input:  `sum(rate(bar1{baz="blip"}[1m]))`,
+			expected: `sum without(__cortex_shard__) (
+			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m])) or
+			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m])) or
+			  sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))
+			)`,
+		},
+		{
+			shards: 3,
+			input:  `sum by(foo) (rate(bar1{baz="blip"}[1m]))`,
+			expected: `sum by(foo) (
+			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m])) or
+			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m])) or
+			  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))
+			)`,
+		},
+		{
+			shards: 2,
+			input: `sum(
+				sum by (foo) (rate(bar1{baz="blip"}[1m]))
+				/
+				sum by (foo) (rate(foo{baz="blip"}[1m]))
+			)`,
+			expected: `sum(
+			  sum by(foo) (
+				sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+				sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			  )
+			  /
+			  sum by(foo) (
+				sum by(foo, __cortex_shard__) (rate(foo{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+				sum by(foo, __cortex_shard__) (rate(foo{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			  )
+			)`,
+		},
+		// This nested sum example is nonsensical, but equivalent.
+		{
+			shards: 2,
+			input:  `sum(sum by(foo) (rate(bar1{baz="blip"}[1m])))`,
+			expected: `sum without(__cortex_shard__) (
+			  sum by(__cortex_shard__) (
+				sum by(foo) (
+				  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+				  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+				)
+			  ) or
+			  sum by(__cortex_shard__)(
+				sum by(foo) (
+				  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+				  sum by(foo, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+				)
+			  )
+			)`,
+		},
+		// without
+		{
+			shards: 2,
+			input:  `sum without(foo) (rate(bar1{baz="blip"}[1m]))`,
+			expected: `sum without(__cortex_shard__) (
+			  sum without(foo) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum without(foo) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)`,
+		},
+		// multiple dimensions
+		{
+			shards: 2,
+			input:  `sum by(foo, bom) (rate(bar1{baz="blip"}[1m]))`,
+			expected: `sum by(foo, bom) (
+			  sum by(foo, bom, __cortex_shard__) (rate(bar1{__cortex_shard__="0_of_2",baz="blip"}[1m])) or
+			  sum by(foo, bom, __cortex_shard__) (rate(bar1{__cortex_shard__="1_of_2",baz="blip"}[1m]))
+			)`,
+		},
+		// sharding histogram inputs
+		{
+			shards: 2,
+			input:  `histogram_quantile(0.9, sum(rate(alertmanager_http_request_duration_seconds_bucket[10m])) by (job, le))`,
+			expected: `histogram_quantile(
+				    0.9,
+				    sum by(job, le) (
+				      sum by(job, le, __cortex_shard__) (rate(alertmanager_http_request_duration_seconds_bucket{__cortex_shard__="0_of_2"}[10m])) or
+				      sum by(job, le, __cortex_shard__) (rate(alertmanager_http_request_duration_seconds_bucket{__cortex_shard__="1_of_2"}[10m]))
+				    )
+				  )`,
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+
+			summer, err := NewShardSummer(c.shards, OrSquasher)
+			require.Nil(t, err)
+			expr, err := promql.ParseExpr(c.input)
+			require.Nil(t, err)
+			res, err := summer.Map(expr)
+			require.Nil(t, err)
+
+			expected, err := promql.ParseExpr(c.expected)
+			require.Nil(t, err)
+
+			require.Equal(t, expected.String(), res.String())
+		})
+	}
+}
+
+func TestShardSummerWithEncoding(t *testing.T) {
+	for i, c := range []struct {
+		shards   int
+		input    string
+		expected string
+	}{
+		{
+			shards:   3,
+			input:    `sum(rate(bar1{baz="blip"}[1m]))`,
+			expected: `sum without(__cortex_shard__) (__embedded_queries__{__cortex_queries__="{\"Concat\":[\"sum by(__cortex_shard__) (rate(bar1{__cortex_shard__=\\\"0_of_3\\\",baz=\\\"blip\\\"}[1m]))\",\"sum by(__cortex_shard__) (rate(bar1{__cortex_shard__=\\\"1_of_3\\\",baz=\\\"blip\\\"}[1m]))\",\"sum by(__cortex_shard__) (rate(bar1{__cortex_shard__=\\\"2_of_3\\\",baz=\\\"blip\\\"}[1m]))\"]}"})`,
+		},
+	} {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			summer, err := NewShardSummer(c.shards, VectorSquasher)
+			require.Nil(t, err)
+			expr, err := promql.ParseExpr(c.input)
+			require.Nil(t, err)
+			res, err := summer.Map(expr)
+			require.Nil(t, err)
+
+			expected, err := promql.ParseExpr(c.expected)
+			require.Nil(t, err)
+
+			require.Equal(t, expected.String(), res.String())
+		})
+	}
+}
+
+func TestParseShard(t *testing.T) {
+	var testExpr = []struct {
+		input  string
+		output ShardAnnotation
+		err    bool
+	}{
+		{
+			input:  "lsdjf",
+			output: ShardAnnotation{},
+			err:    true,
+		},
+		{
+			input:  "a_of_3",
+			output: ShardAnnotation{},
+			err:    true,
+		},
+		{
+			input:  "3_of_3",
+			output: ShardAnnotation{},
+			err:    true,
+		},
+		{
+			input: "1_of_2",
+			output: ShardAnnotation{
+				Shard: 1,
+				Of:    2,
+			},
+		},
+	}
+
+	for _, c := range testExpr {
+		t.Run(fmt.Sprint(c.input), func(t *testing.T) {
+			shard, err := ParseShard(c.input)
+			if c.err {
+				require.NotNil(t, err)
+			} else {
+				require.Nil(t, err)
+				require.Equal(t, c.output, shard)
+			}
+		})
+	}
+
+}
+
+func TestShardFromMatchers(t *testing.T) {
+	var testExpr = []struct {
+		input []*labels.Matcher
+		shard *ShardAnnotation
+		idx   int
+		err   bool
+	}{
+		{
+			input: []*labels.Matcher{
+				{},
+				{
+					Name: ShardLabel,
+					Type: labels.MatchEqual,
+					Value: ShardAnnotation{
+						Shard: 10,
+						Of:    16,
+					}.String(),
+				},
+				{},
+			},
+			shard: &ShardAnnotation{
+				Shard: 10,
+				Of:    16,
+			},
+			idx: 1,
+			err: false,
+		},
+		{
+			input: []*labels.Matcher{
+				{
+					Name:  ShardLabel,
+					Type:  labels.MatchEqual,
+					Value: "invalid-fmt",
+				},
+			},
+			shard: nil,
+			idx:   0,
+			err:   true,
+		},
+		{
+			input: []*labels.Matcher{},
+			shard: nil,
+			idx:   0,
+			err:   false,
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			shard, idx, err := ShardFromMatchers(c.input)
+			if c.err {
+				require.NotNil(t, err)
+			} else {
+				require.Nil(t, err)
+				require.Equal(t, c.shard, shard)
+				require.Equal(t, c.idx, idx)
+			}
+		})
+	}
+
+}
diff --git a/pkg/querier/astmapper/subtree_folder.go b/pkg/querier/astmapper/subtree_folder.go
new file mode 100644
index 00000000000..3f206cdd331
--- /dev/null
+++ b/pkg/querier/astmapper/subtree_folder.go
@@ -0,0 +1,102 @@
+package astmapper
+
+import (
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/promql"
+)
+
+/*
+subtreeFolder is a NodeMapper which embeds an entire promql.Node in an embedded query
+if it does not contain any previously embedded queries. This allows the frontend to "zip up" entire
+subtrees of an AST that have not already been parallelized.
+
+*/
+type subtreeFolder struct {
+	codec Codec
+}
+
+// NewSubtreeFolder creates a subtreeFolder with a specified codec
+func NewSubtreeFolder(codec Codec) (ASTMapper, error) {
+	if codec == nil {
+		return nil, errors.New("nil codec")
+	}
+	return NewASTNodeMapper(&subtreeFolder{
+		codec: JSONCodec,
+	}), nil
+}
+
+// MapNode impls NodeMapper
+func (f *subtreeFolder) MapNode(node promql.Node) (promql.Node, bool, error) {
+	switch n := node.(type) {
+	// do not attempt to fold number or string leaf nodes
+	case *promql.NumberLiteral, *promql.StringLiteral:
+		return n, true, nil
+	}
+
+	containsEmbedded, err := Predicate(node, predicate(isEmbedded))
+	if err != nil {
+		return nil, true, err
+	}
+
+	if containsEmbedded {
+		return node, false, nil
+	}
+
+	expr, err := VectorSquasher(node)
+	return expr, true, err
+}
+
+func isEmbedded(node promql.Node) (bool, error) {
+	switch n := node.(type) {
+	case *promql.VectorSelector:
+		if n.Name == EmbeddedQueryFlag {
+			return true, nil
+		}
+
+	case *promql.MatrixSelector:
+		if n.Name == EmbeddedQueryFlag {
+			return true, nil
+		}
+
+	}
+	return false, nil
+}
+
+type predicate = func(promql.Node) (bool, error)
+
+// Predicate is a helper which uses promql.Walk under the hood determine if any node in a subtree
+// returns true for a specified function
+func Predicate(node promql.Node, fn predicate) (bool, error) {
+	v := &visitor{
+		fn: fn,
+	}
+
+	if err := promql.Walk(v, node, nil); err != nil {
+		return false, err
+	}
+	return v.result, nil
+}
+
+type visitor struct {
+	fn     predicate
+	result bool
+}
+
+// Visit impls promql.Visitor
+func (v *visitor) Visit(node promql.Node, path []promql.Node) (promql.Visitor, error) {
+	// if the visitor has already seen a predicate success, don't overwrite
+	if v.result {
+		return nil, nil
+	}
+
+	var err error
+
+	v.result, err = v.fn(node)
+	if err != nil {
+		return nil, err
+	}
+	if v.result {
+		return nil, nil
+	}
+	return v, nil
+}
diff --git a/pkg/querier/astmapper/subtree_folder_test.go b/pkg/querier/astmapper/subtree_folder_test.go
new file mode 100644
index 00000000000..0a22619d34c
--- /dev/null
+++ b/pkg/querier/astmapper/subtree_folder_test.go
@@ -0,0 +1,114 @@
+package astmapper
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestPredicate(t *testing.T) {
+	for i, tc := range []struct {
+		input    string
+		fn       predicate
+		expected bool
+		err      bool
+	}{
+		{
+			input: "selector1{} or selector2{}",
+			fn: predicate(func(node promql.Node) (bool, error) {
+				return false, errors.New("some err")
+			}),
+			expected: false,
+			err:      true,
+		},
+		{
+			input: "selector1{} or selector2{}",
+			fn: predicate(func(node promql.Node) (bool, error) {
+				return false, nil
+			}),
+			expected: false,
+			err:      false,
+		},
+		{
+			input: "selector1{} or selector2{}",
+			fn: predicate(func(node promql.Node) (bool, error) {
+				return true, nil
+			}),
+			expected: true,
+			err:      false,
+		},
+		{
+			input:    `sum without(__cortex_shard__) (__embedded_queries__{__cortex_queries__="tstquery"}) or sum(selector)`,
+			fn:       predicate(isEmbedded),
+			expected: true,
+			err:      false,
+		},
+	} {
+		t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
+			expr, err := promql.ParseExpr(tc.input)
+			require.Nil(t, err)
+
+			res, err := Predicate(expr.(promql.Node), tc.fn)
+			if tc.err {
+				require.Error(t, err)
+			} else {
+				require.Nil(t, err)
+			}
+
+			require.Equal(t, tc.expected, res)
+		})
+	}
+}
+
+func TestSubtreeMapper(t *testing.T) {
+	for i, tc := range []struct {
+		input    string
+		expected string
+	}{
+		// embed an entire histogram
+		{
+			input:    "histogram_quantile(0.5, rate(alertmanager_http_request_duration_seconds_bucket[1m]))",
+			expected: `__embedded_queries__{__cortex_queries__="{\"Concat\":[\"histogram_quantile(0.5, rate(alertmanager_http_request_duration_seconds_bucket[1m]))\"]}"}`,
+		},
+		// embed a binary expression across two functions
+		{
+			input:    `rate(http_requests_total{cluster="eu-west2"}[5m]) or rate(http_requests_total{cluster="us-central1"}[5m])`,
+			expected: `__embedded_queries__{__cortex_queries__="{\"Concat\":[\"rate(http_requests_total{cluster=\\\"eu-west2\\\"}[5m]) or rate(http_requests_total{cluster=\\\"us-central1\\\"}[5m])\"]}"}`,
+		},
+
+		// the first leg (histogram) hasn't been embedded at any level, so embed that, but ignore the right leg
+		// which has already been embedded.
+		{
+			input: `sum(histogram_quantile(0.5, rate(selector[1m]))) +
+				sum without(__cortex_shard__) (__embedded_queries__{__cortex_queries__="tstquery"})`,
+			expected: `
+			  __embedded_queries__{__cortex_queries__="{\"Concat\":[\"sum(histogram_quantile(0.5, rate(selector[1m])))\"]}"} +
+			  sum without(__cortex_shard__) (__embedded_queries__{__cortex_queries__="tstquery"})
+`,
+		},
+		// should not embed scalars
+		{
+			input:    `histogram_quantile(0.5, __embedded_queries__{__cortex_queries__="tstquery"})`,
+			expected: `histogram_quantile(0.5, __embedded_queries__{__cortex_queries__="tstquery"})`,
+		},
+	} {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			mapper, err := NewSubtreeFolder(JSONCodec)
+			require.Nil(t, err)
+
+			expr, err := promql.ParseExpr(tc.input)
+			require.Nil(t, err)
+			res, err := mapper.Map(expr)
+			require.Nil(t, err)
+
+			expected, err := promql.ParseExpr(tc.expected)
+			require.Nil(t, err)
+
+			require.Equal(t, expected.String(), res.String())
+
+		})
+	}
+}
diff --git a/pkg/querier/chunk_store_queryable.go b/pkg/querier/chunk_store_queryable.go
index a28eaf37dfa..1f16b4e310f 100644
--- a/pkg/querier/chunk_store_queryable.go
+++ b/pkg/querier/chunk_store_queryable.go
@@ -11,11 +11,13 @@ import (
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
+	seriesset "github.com/cortexproject/cortex/pkg/querier/series"
 )
 
 type chunkIteratorFunc func(chunks []chunk.Chunk, from, through model.Time) storage.SeriesIterator
 
-func newChunkStoreQueryable(store ChunkStore, chunkIteratorFunc chunkIteratorFunc) storage.Queryable {
+func newChunkStoreQueryable(store chunkstore.ChunkStore, chunkIteratorFunc chunkIteratorFunc) storage.Queryable {
 	return storage.QueryableFunc(func(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
 		return &chunkStoreQuerier{
 			store:             store,
@@ -28,7 +30,7 @@ func newChunkStoreQueryable(store ChunkStore, chunkIteratorFunc chunkIteratorFun
 }
 
 type chunkStoreQuerier struct {
-	store             ChunkStore
+	store             chunkstore.ChunkStore
 	chunkIteratorFunc chunkIteratorFunc
 	ctx               context.Context
 	mint, maxt        int64
@@ -65,7 +67,7 @@ func (q *chunkStoreQuerier) partitionChunks(chunks []chunk.Chunk) storage.Series
 		})
 	}
 
-	return newConcreteSeriesSet(series)
+	return seriesset.NewConcreteSeriesSet(series)
 }
 
 func (q *chunkStoreQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
diff --git a/pkg/querier/chunk_tar_test.go b/pkg/querier/chunk_tar_test.go
index fab91013af5..608c2297545 100644
--- a/pkg/querier/chunk_tar_test.go
+++ b/pkg/querier/chunk_tar_test.go
@@ -13,6 +13,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/batch"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 	"github.com/cortexproject/cortex/pkg/util"
 	"github.com/pkg/errors"
 	"github.com/prometheus/prometheus/promql"
@@ -20,7 +21,7 @@ import (
 	"github.com/weaveworks/common/user"
 )
 
-func getTarDataFromEnv(t testing.TB) (query string, from, through time.Time, step time.Duration, store ChunkStore) {
+func getTarDataFromEnv(t testing.TB) (query string, from, through time.Time, step time.Duration, store chunkstore.ChunkStore) {
 	var (
 		err            error
 		chunksFilename = os.Getenv("CHUNKS")
@@ -47,7 +48,7 @@ func getTarDataFromEnv(t testing.TB) (query string, from, through time.Time, ste
 	return query, from, through, step, &mockChunkStore{chunks}
 }
 
-func runRangeQuery(t testing.TB, query string, from, through time.Time, step time.Duration, store ChunkStore) {
+func runRangeQuery(t testing.TB, query string, from, through time.Time, step time.Duration, store chunkstore.ChunkStore) {
 	if len(query) == 0 || store == nil {
 		return
 	}
diff --git a/pkg/querier/chunks_handler.go b/pkg/querier/chunks_handler.go
index d91ed8eb58c..ea694ae12b0 100644
--- a/pkg/querier/chunks_handler.go
+++ b/pkg/querier/chunks_handler.go
@@ -10,6 +10,7 @@ import (
 	"github.com/prometheus/prometheus/storage"
 	"github.com/weaveworks/common/user"
 
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 	"github.com/cortexproject/cortex/pkg/querier/queryrange"
 )
 
@@ -48,7 +49,7 @@ func ChunksHandler(queryable storage.Queryable) http.Handler {
 			return
 		}
 
-		store, ok := querier.(ChunkStore)
+		store, ok := querier.(chunkstore.ChunkStore)
 		if !ok {
 			http.Error(w, "not supported", http.StatusServiceUnavailable)
 			return
diff --git a/pkg/querier/chunkstore/chunkstore.go b/pkg/querier/chunkstore/chunkstore.go
new file mode 100644
index 00000000000..3b1045ef0d6
--- /dev/null
+++ b/pkg/querier/chunkstore/chunkstore.go
@@ -0,0 +1,15 @@
+package chunkstore
+
+import (
+	"context"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+)
+
+// ChunkStore is the read-interface to the Chunk Store.  Made an interface here
+// to reduce package coupling.
+type ChunkStore interface {
+	Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]chunk.Chunk, error)
+}
diff --git a/pkg/querier/distributor_queryable.go b/pkg/querier/distributor_queryable.go
index c52d67b05be..b941cf5eb4f 100644
--- a/pkg/querier/distributor_queryable.go
+++ b/pkg/querier/distributor_queryable.go
@@ -10,6 +10,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 	"github.com/cortexproject/cortex/pkg/prom1/storage/metric"
+	"github.com/cortexproject/cortex/pkg/querier/series"
 )
 
 // Distributor is the read interface to the distributor, made an interface here
@@ -51,7 +52,7 @@ func (q *distributorQuerier) Select(sp *storage.SelectParams, matchers ...*label
 		return nil, nil, promql.ErrStorage{Err: err}
 	}
 
-	return matrixToSeriesSet(matrix), nil, nil
+	return series.MatrixToSeriesSet(matrix), nil, nil
 }
 
 func (q *distributorQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
diff --git a/pkg/querier/ingester_streaming_queryable.go b/pkg/querier/ingester_streaming_queryable.go
index f516a39267c..8fbe9b90617 100644
--- a/pkg/querier/ingester_streaming_queryable.go
+++ b/pkg/querier/ingester_streaming_queryable.go
@@ -11,6 +11,7 @@ import (
 
 	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/ingester/client"
+	seriesset "github.com/cortexproject/cortex/pkg/querier/series"
 	"github.com/cortexproject/cortex/pkg/util/chunkcompat"
 	"github.com/weaveworks/common/user"
 )
@@ -109,5 +110,5 @@ func (q *ingesterStreamingQuerier) Select(sp *storage.SelectParams, matchers ...
 		serieses = append(serieses, series)
 	}
 
-	return newConcreteSeriesSet(serieses), nil, nil
+	return seriesset.NewConcreteSeriesSet(serieses), nil, nil
 }
diff --git a/pkg/querier/lazy_querier.go b/pkg/querier/lazyquery/lazyquery.go
similarity index 58%
rename from pkg/querier/lazy_querier.go
rename to pkg/querier/lazyquery/lazyquery.go
index 6b0e2107d95..60954be341b 100644
--- a/pkg/querier/lazy_querier.go
+++ b/pkg/querier/lazyquery/lazyquery.go
@@ -1,26 +1,49 @@
-package querier
+package lazyquery
 
 import (
 	"context"
 	"fmt"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 	"github.com/prometheus/common/model"
 	"github.com/prometheus/prometheus/pkg/labels"
 	"github.com/prometheus/prometheus/storage"
 )
 
-type lazyQuerier struct {
+// LazyQueryable wraps a storage.Queryable
+type LazyQueryable struct {
+	q storage.Queryable
+}
+
+// Querier impls storage.Queryable
+func (lq LazyQueryable) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
+	q, err := lq.q.Querier(ctx, mint, maxt)
+	if err != nil {
+		return nil, err
+	}
+
+	return NewLazyQuerier(q), nil
+}
+
+// NewLazyQueryable returns a lazily wrapped queryable
+func NewLazyQueryable(q storage.Queryable) storage.Queryable {
+	return LazyQueryable{q}
+}
+
+// LazyQuerier is a lazy-loaded adapter for a storage.Querier
+type LazyQuerier struct {
 	next storage.Querier
 }
 
-// newLazyQuerier wraps a storage.Querier, does the Select in the background.
+// NewLazyQuerier wraps a storage.Querier, does the Select in the background.
 // Return value cannot be used from more than one goroutine simultaneously.
-func newLazyQuerier(next storage.Querier) storage.Querier {
-	return lazyQuerier{next}
+func NewLazyQuerier(next storage.Querier) storage.Querier {
+	return LazyQuerier{next}
 }
 
-func (l lazyQuerier) Select(params *storage.SelectParams, matchers ...*labels.Matcher) (storage.SeriesSet, storage.Warnings, error) {
+// Select impls Storage.Querier
+func (l LazyQuerier) Select(params *storage.SelectParams, matchers ...*labels.Matcher) (storage.SeriesSet, storage.Warnings, error) {
 	future := make(chan storage.SeriesSet)
 	go func() {
 		set, _, err := l.next.Select(params, matchers...)
@@ -35,21 +58,24 @@ func (l lazyQuerier) Select(params *storage.SelectParams, matchers ...*labels.Ma
 	}, nil, nil
 }
 
-func (l lazyQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
+// LabelValues impls Storage.Querier
+func (l LazyQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
 	return l.next.LabelValues(name)
 }
 
-func (l lazyQuerier) LabelNames() ([]string, storage.Warnings, error) {
+// LabelNames impls Storage.Querier
+func (l LazyQuerier) LabelNames() ([]string, storage.Warnings, error) {
 	return l.next.LabelNames()
 }
 
-func (l lazyQuerier) Close() error {
+// Close impls Storage.Querier
+func (l LazyQuerier) Close() error {
 	return l.next.Close()
 }
 
 // Get implements ChunkStore for the chunk tar HTTP handler.
-func (l lazyQuerier) Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]chunk.Chunk, error) {
-	store, ok := l.next.(ChunkStore)
+func (l LazyQuerier) Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]chunk.Chunk, error) {
+	store, ok := l.next.(chunkstore.ChunkStore)
 	if !ok {
 		return nil, fmt.Errorf("not supported")
 	}
diff --git a/pkg/querier/matrix.go b/pkg/querier/matrix.go
index c6bd9d0ec3f..8ac963ad5ac 100644
--- a/pkg/querier/matrix.go
+++ b/pkg/querier/matrix.go
@@ -5,6 +5,7 @@ import (
 	"github.com/prometheus/prometheus/storage"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/series"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -13,12 +14,12 @@ func mergeChunks(chunks []chunk.Chunk, from, through model.Time) storage.SeriesI
 	for _, c := range chunks {
 		ss, err := c.Samples(from, through)
 		if err != nil {
-			return errIterator{err}
+			return series.NewErrIterator(err)
 		}
 
 		samples = append(samples, ss)
 	}
 
 	merged := util.MergeNSampleSets(samples...)
-	return newConcreteSeriesIterator(newConcreteSeries(nil, merged))
+	return series.NewConcreteSeriesIterator(series.NewConcreteSeries(nil, merged))
 }
diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go
index d91937d74ff..49b6818b820 100644
--- a/pkg/querier/querier.go
+++ b/pkg/querier/querier.go
@@ -11,9 +11,11 @@ import (
 	"github.com/prometheus/prometheus/promql"
 	"github.com/prometheus/prometheus/storage"
 
-	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/batch"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 	"github.com/cortexproject/cortex/pkg/querier/iterators"
+	"github.com/cortexproject/cortex/pkg/querier/lazyquery"
+	seriesset "github.com/cortexproject/cortex/pkg/querier/series"
 	"github.com/cortexproject/cortex/pkg/util"
 )
 
@@ -52,14 +54,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	cfg.metricsRegisterer = prometheus.DefaultRegisterer
 }
 
-// ChunkStore is the read-interface to the Chunk Store.  Made an interface here
-// to reduce package coupling.
-type ChunkStore interface {
-	Get(ctx context.Context, userID string, from, through model.Time, matchers ...*labels.Matcher) ([]chunk.Chunk, error)
-}
-
 // New builds a queryable and promql engine.
-func New(cfg Config, distributor Distributor, chunkStore ChunkStore) (storage.Queryable, *promql.Engine) {
+func New(cfg Config, distributor Distributor, chunkStore chunkstore.ChunkStore) (storage.Queryable, *promql.Engine) {
 	iteratorFunc := mergeChunks
 	if cfg.BatchIterators {
 		iteratorFunc = batch.NewChunkMergeIterator
@@ -82,7 +78,7 @@ func New(cfg Config, distributor Distributor, chunkStore ChunkStore) (storage.Qu
 		if err != nil {
 			return nil, err
 		}
-		return newLazyQuerier(querier), nil
+		return lazyquery.NewLazyQuerier(querier), nil
 	})
 
 	promql.SetDefaultEvaluationInterval(cfg.DefaultEvaluationInterval)
@@ -183,7 +179,7 @@ func (q querier) metadataQuery(matchers ...*labels.Matcher) (storage.SeriesSet,
 	if err != nil {
 		return nil, nil, err
 	}
-	return metricsToSeriesSet(ms), nil, nil
+	return seriesset.MetricsToSeriesSet(ms), nil, nil
 }
 
 func (querier) Close() error {
diff --git a/pkg/querier/queryrange/promql_test.go b/pkg/querier/queryrange/promql_test.go
new file mode 100644
index 00000000000..fe33843d638
--- /dev/null
+++ b/pkg/querier/queryrange/promql_test.go
@@ -0,0 +1,612 @@
+package queryrange
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"sort"
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/prometheus/client_golang/prometheus"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/stretchr/testify/require"
+)
+
+var (
+	start  = time.Unix(1000, 0)
+	end    = start.Add(3 * time.Minute)
+	step   = 30 * time.Second
+	ctx    = context.Background()
+	engine = promql.NewEngine(promql.EngineOpts{
+		Reg:                prometheus.DefaultRegisterer,
+		MaxConcurrent:      1000,
+		Logger:             util.Logger,
+		Timeout:            1 * time.Hour,
+		MaxSamples:         10e6,
+		ActiveQueryTracker: nil,
+	})
+)
+
+// This test allows to verify which PromQL expressions can be parallelized.
+func Test_PromQL(t *testing.T) {
+	t.Parallel()
+
+	var tests = []struct {
+		normalQuery string
+		shardQuery  string
+		shouldEqual bool
+	}{
+		// Vector can be parallelized but we need to remove the cortex shard label.
+		// It should be noted that the __cortex_shard__ label is required by the engine
+		// and therefore should be returned by the storage.
+		// Range vectors `bar1{baz="blip"}[1m]` are not tested here because it is not supported
+		// by range queries.
+		{
+			`bar1{baz="blip"}`,
+			`label_replace(
+				bar1{__cortex_shard__="0_of_3",baz="blip"} or
+				bar1{__cortex_shard__="1_of_3",baz="blip"} or
+				bar1{__cortex_shard__="2_of_3",baz="blip"},
+				"__cortex_shard__","","",""
+			)`,
+			true,
+		},
+		// __cortex_shard__ label is required otherwise the or will keep only the first series.
+		{
+			`sum(bar1{baz="blip"})`,
+			`sum(
+				sum (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				sum (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				sum (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			false,
+		},
+		{
+			`sum(bar1{baz="blip"})`,
+			`sum(
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		{
+			`sum by (foo) (bar1{baz="blip"})`,
+			`sum by (foo) (
+				sum by(foo,__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				sum by(foo,__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				sum by(foo,__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		{
+			`sum by (foo,bar) (bar1{baz="blip"})`,
+			`sum by (foo,bar)(
+				sum by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				sum by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				sum by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		// since series are unique to a shard, it's safe to sum without shard first, then reaggregate
+		{
+			`sum without (foo,bar) (bar1{baz="blip"})`,
+			`sum without (foo,bar)(
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				sum without(__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		{
+			`min by (foo,bar) (bar1{baz="blip"})`,
+			`min by (foo,bar)(
+				min by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				min by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				min by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		{
+			`max by (foo,bar) (bar1{baz="blip"})`,
+			` max by (foo,bar)(
+				max by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				max by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				max by(foo,bar,__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			true,
+		},
+		// avg generally cant be parallelized
+		{
+			`avg(bar1{baz="blip"})`,
+			`avg(
+				avg by(__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				avg by(__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				avg by(__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			false,
+		},
+		// stddev can't be parallelized.
+		{
+			`stddev(bar1{baz="blip"})`,
+			` stddev(
+				stddev by(__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				stddev by(__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				stddev by(__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			false,
+		},
+		// stdvar can't be parallelized.
+		{
+			`stdvar(bar1{baz="blip"})`,
+			`stdvar(
+				stdvar by(__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				stdvar by(__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				stdvar by(__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			  )`,
+			false,
+		},
+		{
+			`count(bar1{baz="blip"})`,
+			`count(
+				count without (__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+				)`,
+			true,
+		},
+		{
+			`count by (foo,bar) (bar1{baz="blip"})`,
+			`count by (foo,bar) (
+				count by (foo,bar,__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				count by (foo,bar,__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				count by (foo,bar,__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			)`,
+			true,
+		},
+		// different ways to represent count without.
+		{
+			`count without (foo) (bar1{baz="blip"})`,
+			`count without (foo) (
+				count without (__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			)`,
+			true,
+		},
+		{
+			`count without (foo) (bar1{baz="blip"})`,
+			`sum without (__cortex_shard__) (
+				count without (foo) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				count without (foo) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				count without (foo) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			)`,
+			true,
+		},
+		{
+			`count without (foo, bar) (bar1{baz="blip"})`,
+			`count without (foo, bar) (
+				count without (__cortex_shard__) (bar1{__cortex_shard__="0_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="1_of_3",baz="blip"}) or
+				count without (__cortex_shard__) (bar1{__cortex_shard__="2_of_3",baz="blip"})
+			)`,
+			true,
+		},
+		{
+			`topk(2,bar1{baz="blip"})`,
+			`label_replace(
+				topk(2,
+					topk(2,(bar1{__cortex_shard__="0_of_3",baz="blip"})) without(__cortex_shard__) or
+					topk(2,(bar1{__cortex_shard__="1_of_3",baz="blip"})) without(__cortex_shard__) or
+					topk(2,(bar1{__cortex_shard__="2_of_3",baz="blip"})) without(__cortex_shard__)
+				),
+                          "__cortex_shard__","","","")`,
+			true,
+		},
+		{
+			`bottomk(2,bar1{baz="blip"})`,
+			`label_replace(
+				bottomk(2,
+					bottomk(2,(bar1{__cortex_shard__="0_of_3",baz="blip"})) without(__cortex_shard__) or
+					bottomk(2,(bar1{__cortex_shard__="1_of_3",baz="blip"})) without(__cortex_shard__) or
+					bottomk(2,(bar1{__cortex_shard__="2_of_3",baz="blip"})) without(__cortex_shard__)
+				),
+                          "__cortex_shard__","","","")`,
+			true,
+		},
+		{
+			`sum by (foo,bar) (avg_over_time(bar1{baz="blip"}[1m]))`,
+			`sum by (foo,bar)(
+				sum by(foo,bar,__cortex_shard__) (avg_over_time(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m])) or
+				sum by(foo,bar,__cortex_shard__) (avg_over_time(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m])) or
+				sum by(foo,bar,__cortex_shard__) (avg_over_time(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))
+			  )`,
+			true,
+		},
+		{
+			`sum by (foo,bar) (min_over_time(bar1{baz="blip"}[1m]))`,
+			`sum by (foo,bar)(
+				sum by(foo,bar,__cortex_shard__) (min_over_time(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m])) or
+				sum by(foo,bar,__cortex_shard__) (min_over_time(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m])) or
+				sum by(foo,bar,__cortex_shard__) (min_over_time(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))
+			  )`,
+			true,
+		},
+	}
+
+	for _, tt := range tests {
+		tt := tt
+		t.Run(tt.normalQuery, func(t *testing.T) {
+
+			baseQuery, err := engine.NewRangeQuery(shardAwareQueryable, tt.normalQuery, start, end, step)
+			require.Nil(t, err)
+			shardQuery, err := engine.NewRangeQuery(shardAwareQueryable, tt.shardQuery, start, end, step)
+			require.Nil(t, err)
+			baseResult := baseQuery.Exec(ctx)
+			shardResult := shardQuery.Exec(ctx)
+			t.Logf("base: %v\n", baseResult)
+			t.Logf("shard: %v\n", shardResult)
+			if tt.shouldEqual {
+				require.Equal(t, baseResult, shardResult)
+				return
+			}
+			require.NotEqual(t, baseResult, shardResult)
+		})
+	}
+
+}
+
+func Test_FunctionParallelism(t *testing.T) {
+	tpl := `sum(<fn>(bar1{}<fArgs>))`
+	shardTpl := `sum(
+				sum without(__cortex_shard__) (<fn>(bar1{__cortex_shard__="0_of_3"}<fArgs>)) or
+				sum without(__cortex_shard__) (<fn>(bar1{__cortex_shard__="1_of_3"}<fArgs>)) or
+				sum without(__cortex_shard__) (<fn>(bar1{__cortex_shard__="2_of_3"}<fArgs>))
+			  )`
+
+	mkQuery := func(tpl, fn string, testMatrix bool, fArgs []string) (result string) {
+		result = strings.Replace(tpl, "<fn>", fn, -1)
+
+		if testMatrix {
+			// turn selectors into ranges
+			result = strings.Replace(result, "}<fArgs>", "}[1m]<fArgs>", -1)
+		}
+
+		if fArgs != nil && len(fArgs) > 0 {
+			args := "," + strings.Join(fArgs, ",")
+			result = strings.Replace(result, "<fArgs>", args, -1)
+		} else {
+			result = strings.Replace(result, "<fArgs>", "", -1)
+		}
+
+		return result
+	}
+
+	for _, tc := range []struct {
+		fn           string
+		fArgs        []string
+		isTestMatrix bool
+		approximate  bool
+	}{
+		{
+			fn: "abs",
+		},
+		{
+			fn: "absent",
+		},
+		{
+			fn:           "avg_over_time",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn: "ceil",
+		},
+		{
+			fn:           "changes",
+			isTestMatrix: true,
+		},
+		{
+			fn:           "count_over_time",
+			isTestMatrix: true,
+		},
+		{
+			fn: "days_in_month",
+		},
+		{
+			fn: "day_of_month",
+		},
+		{
+			fn: "day_of_week",
+		},
+		{
+			fn:           "delta",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "deriv",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:          "exp",
+			approximate: true,
+		},
+		{
+			fn: "floor",
+		},
+		{
+			fn: "hour",
+		},
+		{
+			fn:           "idelta",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "increase",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "irate",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:          "ln",
+			approximate: true,
+		},
+		{
+			fn:          "log10",
+			approximate: true,
+		},
+		{
+			fn:          "log2",
+			approximate: true,
+		},
+		{
+			fn:           "max_over_time",
+			isTestMatrix: true,
+		},
+		{
+			fn:           "min_over_time",
+			isTestMatrix: true,
+		},
+		{
+			fn: "minute",
+		},
+		{
+			fn: "month",
+		},
+		{
+			fn:           "rate",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "resets",
+			isTestMatrix: true,
+		},
+		{
+			fn: "sort",
+		},
+		{
+			fn: "sort_desc",
+		},
+		{
+			fn:          "sqrt",
+			approximate: true,
+		},
+		{
+			fn:           "stddev_over_time",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "stdvar_over_time",
+			isTestMatrix: true,
+			approximate:  true,
+		},
+		{
+			fn:           "sum_over_time",
+			isTestMatrix: true,
+		},
+		{
+			fn: "timestamp",
+		},
+		{
+			fn: "year",
+		},
+		{
+			fn:    "clamp_max",
+			fArgs: []string{"5"},
+		},
+		{
+			fn:    "clamp_min",
+			fArgs: []string{"5"},
+		},
+		{
+			fn:           "predict_linear",
+			isTestMatrix: true,
+			approximate:  true,
+			fArgs:        []string{"1"},
+		},
+		{
+			fn:    "round",
+			fArgs: []string{"20"},
+		},
+		{
+			fn:           "holt_winters",
+			isTestMatrix: true,
+			fArgs:        []string{"0.5", "0.7"},
+			approximate:  true,
+		},
+	} {
+
+		t.Run(tc.fn, func(t *testing.T) {
+			baseQuery, err := engine.NewRangeQuery(
+				shardAwareQueryable,
+				mkQuery(tpl, tc.fn, tc.isTestMatrix, tc.fArgs),
+				start,
+				end,
+				step,
+			)
+			require.Nil(t, err)
+			shardQuery, err := engine.NewRangeQuery(
+				shardAwareQueryable,
+				mkQuery(shardTpl, tc.fn, tc.isTestMatrix, tc.fArgs),
+				start,
+				end,
+				step,
+			)
+			require.Nil(t, err)
+			baseResult := baseQuery.Exec(ctx)
+			shardResult := shardQuery.Exec(ctx)
+			t.Logf("base: %+v\n", baseResult)
+			t.Logf("shard: %+v\n", shardResult)
+			if !tc.approximate {
+				require.Equal(t, baseResult, shardResult)
+			} else {
+				// Some functions yield tiny differences when sharded due to combining floating point calculations.
+				baseSeries := baseResult.Value.(promql.Matrix)[0]
+				shardSeries := shardResult.Value.(promql.Matrix)[0]
+
+				require.Equal(t, len(baseSeries.Points), len(shardSeries.Points))
+				for i, basePt := range baseSeries.Points {
+					shardPt := shardSeries.Points[i]
+					require.Equal(t, basePt.T, shardPt.T)
+					require.Equal(
+						t,
+						math.Round(basePt.V*1e6)/1e6,
+						math.Round(shardPt.V*1e6)/1e6,
+					)
+				}
+
+			}
+		})
+	}
+
+}
+
+var shardAwareQueryable = storage.QueryableFunc(func(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
+	return &testMatrix{
+		series: []*promql.StorageSeries{
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blop"}, {Name: "foo", Value: "barr"}}, factor(5)),
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blop"}, {Name: "foo", Value: "bazz"}}, factor(7)),
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blap"}, {Name: "foo", Value: "buzz"}}, factor(12)),
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blap"}, {Name: "foo", Value: "bozz"}}, factor(11)),
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blop"}, {Name: "foo", Value: "buzz"}}, factor(8)),
+			newSeries(labels.Labels{{Name: "__name__", Value: "bar1"}, {Name: "baz", Value: "blip"}, {Name: "bar", Value: "blap"}, {Name: "foo", Value: "bazz"}}, identity),
+		},
+	}, nil
+})
+
+type testMatrix struct {
+	series []*promql.StorageSeries
+}
+
+func (m *testMatrix) Copy() *testMatrix {
+	cpy := *m
+	return &cpy
+}
+
+func (m testMatrix) Next() bool { return len(m.series) != 0 }
+
+func (m *testMatrix) At() storage.Series {
+	res := m.series[0]
+	m.series = m.series[1:]
+	return res
+}
+
+func (m *testMatrix) Err() error { return nil }
+
+func (m *testMatrix) Select(selectParams *storage.SelectParams, matchers ...*labels.Matcher) (storage.SeriesSet, storage.Warnings, error) {
+	s, _, err := astmapper.ShardFromMatchers(matchers)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	if s != nil {
+		return splitByShard(s.Shard, s.Of, m), nil, nil
+	}
+
+	return m.Copy(), nil, nil
+}
+
+func (m *testMatrix) LabelValues(name string) ([]string, storage.Warnings, error) {
+	return nil, nil, nil
+}
+func (m *testMatrix) LabelNames() ([]string, storage.Warnings, error) { return nil, nil, nil }
+func (m *testMatrix) Close() error                                    { return nil }
+
+func newSeries(metric labels.Labels, generator func(float64) float64) *promql.StorageSeries {
+	sort.Sort(metric)
+	var points []promql.Point
+
+	for ts := start.Add(-step); ts.Unix() <= end.Unix(); ts = ts.Add(step) {
+		t := ts.Unix() * 1e3
+		points = append(points, promql.Point{
+			T: t,
+			V: generator(float64(t)),
+		})
+	}
+
+	return promql.NewStorageSeries(promql.Series{
+		Metric: metric,
+		Points: points,
+	})
+}
+
+func identity(t float64) float64 {
+	return float64(t)
+}
+
+func factor(f float64) func(float64) float64 {
+	i := 0.
+	return func(float64) float64 {
+		i++
+		res := i * f
+		return res
+	}
+}
+
+// var identity(t int64) float64 {
+// 	return float64(t)
+// }
+
+// splitByShard returns the shard subset of a testMatrix.
+// e.g if a testMatrix has 6 series, and we want 3 shard, then each shard will contain
+// 2 series.
+func splitByShard(shardIndex, shardTotal int, testMatrices *testMatrix) *testMatrix {
+	res := &testMatrix{}
+	for i, s := range testMatrices.series {
+		if i%shardTotal != shardIndex {
+			continue
+		}
+		var points []promql.Point
+		it := s.Iterator()
+		for it.Next() {
+			t, v := it.At()
+			points = append(points, promql.Point{
+				T: t,
+				V: v,
+			})
+
+		}
+		lbs := s.Labels().Copy()
+		lbs = append(lbs, labels.Label{Name: "__cortex_shard__", Value: fmt.Sprintf("%d_of_%d", shardIndex, shardTotal)})
+		res.series = append(res.series, promql.NewStorageSeries(promql.Series{
+			Metric: lbs,
+			Points: points,
+		}))
+	}
+	return res
+}
diff --git a/pkg/querier/queryrange/query_range.go b/pkg/querier/queryrange/query_range.go
index e1691c74033..906521a944d 100644
--- a/pkg/querier/queryrange/query_range.go
+++ b/pkg/querier/queryrange/query_range.go
@@ -22,7 +22,8 @@ import (
 	"github.com/cortexproject/cortex/pkg/ingester/client"
 )
 
-const statusSuccess = "success"
+// StatusSuccess Prometheus success result.
+const StatusSuccess = "success"
 
 var (
 	matrix            = model.ValMatrix.String()
@@ -66,6 +67,8 @@ type Request interface {
 	GetQuery() string
 	// WithStartEnd clone the current request with different start and end timestamp.
 	WithStartEnd(int64, int64) Request
+	// WithQuery clone the current request with a different query.
+	WithQuery(string) Request
 	proto.Message
 }
 
@@ -95,6 +98,13 @@ func (q *PrometheusRequest) WithStartEnd(start int64, end int64) Request {
 	return &new
 }
 
+// WithQuery clones the current `PrometheusRequest` with a new query.
+func (q *PrometheusRequest) WithQuery(query string) Request {
+	new := *q
+	new.Query = query
+	return &new
+}
+
 type byFirstTime []*PrometheusResponse
 
 func (a byFirstTime) Len() int           { return len(a) }
@@ -122,12 +132,12 @@ func (prometheusCodec) MergeResponse(responses ...Response) (Response, error) {
 
 	if len(promResponses) == 0 {
 		return &PrometheusResponse{
-			Status: statusSuccess,
+			Status: StatusSuccess,
 		}, nil
 	}
 
 	return &PrometheusResponse{
-		Status: statusSuccess,
+		Status: StatusSuccess,
 		Data: PrometheusData{
 			ResultType: model.ValMatrix.String(),
 			Result:     matrixMerge(promResponses),
diff --git a/pkg/querier/queryrange/query_range_test.go b/pkg/querier/queryrange/query_range_test.go
index f9649d27907..2002bd4bf8c 100644
--- a/pkg/querier/queryrange/query_range_test.go
+++ b/pkg/querier/queryrange/query_range_test.go
@@ -115,7 +115,7 @@ func TestMergeAPIResponses(t *testing.T) {
 		{
 			input: []Response{},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 			},
 		},
 
@@ -130,7 +130,7 @@ func TestMergeAPIResponses(t *testing.T) {
 				},
 			},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 				Data: PrometheusData{
 					ResultType: matrix,
 					Result:     []SampleStream{},
@@ -155,7 +155,7 @@ func TestMergeAPIResponses(t *testing.T) {
 				},
 			},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 				Data: PrometheusData{
 					ResultType: matrix,
 					Result:     []SampleStream{},
@@ -196,7 +196,7 @@ func TestMergeAPIResponses(t *testing.T) {
 				},
 			},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 				Data: PrometheusData{
 					ResultType: matrix,
 					Result: []SampleStream{
@@ -221,7 +221,7 @@ func TestMergeAPIResponses(t *testing.T) {
 				mustParse(t, `{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"c":"d","a":"b"},"values":[[2,"2"],[3,"3"]]}]}}`),
 			},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 				Data: PrometheusData{
 					ResultType: matrix,
 					Result: []SampleStream{
@@ -245,7 +245,7 @@ func TestMergeAPIResponses(t *testing.T) {
 				mustParse(t, `{"status":"success","data":{"resultType":"matrix","result":[{"metric":{"c":"d","a":"b"},"values":[[2,"2"],[3,"3"]]}]}}`),
 			},
 			expected: &PrometheusResponse{
-				Status: statusSuccess,
+				Status: StatusSuccess,
 				Data: PrometheusData{
 					ResultType: matrix,
 					Result: []SampleStream{
diff --git a/pkg/querier/queryrange/queryable.go b/pkg/querier/queryrange/queryable.go
new file mode 100644
index 00000000000..de3969bafaf
--- /dev/null
+++ b/pkg/querier/queryrange/queryable.go
@@ -0,0 +1,121 @@
+package queryrange
+
+import (
+	"context"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/storage"
+)
+
+const (
+	missingEmbeddedQueryMsg = "missing embedded query"
+	nonEmbeddedErrMsg       = "DownstreamQuerier cannot handle a non-embedded query"
+)
+
+// DownstreamQueryable is an implementor of the Queryable interface.
+type DownstreamQueryable struct {
+	Req     Request
+	Handler Handler
+}
+
+// Querier impls Queryable
+func (q *DownstreamQueryable) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
+	return &DownstreamQuerier{ctx, q.Req, q.Handler}, nil
+}
+
+// DownstreamQuerier is a an implementor of the Querier interface.
+type DownstreamQuerier struct {
+	Ctx     context.Context
+	Req     Request
+	Handler Handler
+}
+
+// Select returns a set of series that matches the given label matchers.
+func (q *DownstreamQuerier) Select(
+	_ *storage.SelectParams,
+	matchers ...*labels.Matcher,
+) (storage.SeriesSet, storage.Warnings, error) {
+	var embeddedQuery string
+	var isEmbedded bool
+	for _, matcher := range matchers {
+		if matcher.Name == labels.MetricName && matcher.Value == astmapper.EmbeddedQueryFlag {
+			isEmbedded = true
+		}
+
+		if matcher.Name == astmapper.QueryLabel {
+			embeddedQuery = matcher.Value
+		}
+	}
+
+	if isEmbedded {
+		if embeddedQuery != "" {
+			return q.handleEmbeddedQuery(embeddedQuery)
+		}
+		return nil, nil, errors.Errorf(missingEmbeddedQueryMsg)
+
+	}
+
+	return nil, nil, errors.Errorf(nonEmbeddedErrMsg)
+}
+
+// handleEmbeddedQuery defers execution of an encoded query to a downstream Handler
+func (q *DownstreamQuerier) handleEmbeddedQuery(encoded string) (storage.SeriesSet, storage.Warnings, error) {
+	queries, err := astmapper.JSONCodec.Decode(encoded)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	ctx, cancel := context.WithCancel(q.Ctx)
+	defer cancel()
+
+	// buffer channels to length of queries to prevent leaking memory due to sending to unbuffered channels after cancel/err
+	errCh := make(chan error, len(queries))
+	samplesCh := make(chan []SampleStream, len(queries))
+	// TODO(owen-d): impl unified concurrency controls, not per middleware
+	for _, query := range queries {
+		go func(query string) {
+			resp, err := q.Handler.Do(ctx, q.Req.WithQuery(query))
+			if err != nil {
+				errCh <- err
+				return
+			}
+			streams, err := ResponseToSamples(resp)
+			if err != nil {
+				errCh <- err
+				return
+			}
+			samplesCh <- streams
+			return
+		}(query)
+	}
+
+	var samples []SampleStream
+
+	for i := 0; i < len(queries); i++ {
+		select {
+		case err := <-errCh:
+			return nil, nil, err
+		case streams := <-samplesCh:
+			samples = append(samples, streams...)
+		}
+	}
+
+	return NewSeriesSet(samples), nil, err
+}
+
+// LabelValues returns all potential values for a label name.
+func (q *DownstreamQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
+	return nil, nil, errors.Errorf("unimplemented")
+}
+
+// LabelNames returns all the unique label names present in the block in sorted order.
+func (q *DownstreamQuerier) LabelNames() ([]string, storage.Warnings, error) {
+	return nil, nil, errors.Errorf("unimplemented")
+}
+
+// Close releases the resources of the Querier.
+func (q *DownstreamQuerier) Close() error {
+	return nil
+}
diff --git a/pkg/querier/queryrange/queryable_test.go b/pkg/querier/queryrange/queryable_test.go
new file mode 100644
index 00000000000..d056bc37dca
--- /dev/null
+++ b/pkg/querier/queryrange/queryable_test.go
@@ -0,0 +1,260 @@
+package queryrange
+
+import (
+	"context"
+	"testing"
+
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestSelect(t *testing.T) {
+	var testExpr = []struct {
+		name    string
+		querier *DownstreamQuerier
+		fn      func(*testing.T, *DownstreamQuerier)
+	}{
+		{
+			name: "errors non embedded query",
+			querier: mkQuerier(
+				nil,
+			),
+			fn: func(t *testing.T, q *DownstreamQuerier) {
+				set, _, err := q.Select(nil)
+				require.Nil(t, set)
+				require.EqualError(t, err, nonEmbeddedErrMsg)
+			},
+		},
+		{
+			name: "replaces query",
+			querier: mkQuerier(mockHandler(
+				&PrometheusResponse{},
+				nil,
+			)),
+			fn: func(t *testing.T, q *DownstreamQuerier) {
+
+				expected := &PrometheusResponse{
+					Status: "success",
+					Data: PrometheusData{
+						ResultType: promql.ValueTypeVector,
+					},
+				}
+
+				// override handler func to assert new query has been substituted
+				q.Handler = HandlerFunc(
+					func(ctx context.Context, req Request) (Response, error) {
+						require.Equal(t, `http_requests_total{cluster="prod"}`, req.GetQuery())
+						return expected, nil
+					},
+				)
+
+				_, _, err := q.Select(
+					nil,
+					exactMatch("__name__", astmapper.EmbeddedQueryFlag),
+					exactMatch(astmapper.QueryLabel, astmapper.JSONCodec.Encode([]string{`http_requests_total{cluster="prod"}`})),
+				)
+				require.Nil(t, err)
+			},
+		},
+		{
+			name: "propagates response error",
+			querier: mkQuerier(mockHandler(
+				&PrometheusResponse{
+					Error: "SomeErr",
+				},
+				nil,
+			)),
+			fn: func(t *testing.T, q *DownstreamQuerier) {
+				set, _, err := q.Select(
+					nil,
+					exactMatch("__name__", astmapper.EmbeddedQueryFlag),
+					exactMatch(astmapper.QueryLabel, astmapper.JSONCodec.Encode([]string{`http_requests_total{cluster="prod"}`})),
+				)
+				require.Nil(t, set)
+				require.EqualError(t, err, "SomeErr")
+			},
+		},
+		{
+			name: "returns SeriesSet",
+			querier: mkQuerier(mockHandler(
+				&PrometheusResponse{
+					Data: PrometheusData{
+						ResultType: promql.ValueTypeVector,
+						Result: []SampleStream{
+							{
+								Labels: []client.LabelAdapter{
+									{Name: "a", Value: "a1"},
+									{Name: "b", Value: "b1"},
+								},
+								Samples: []client.Sample{
+									{
+										Value:       1,
+										TimestampMs: 1,
+									},
+									{
+										Value:       2,
+										TimestampMs: 2,
+									},
+								},
+							},
+							{
+								Labels: []client.LabelAdapter{
+									{Name: "a", Value: "a1"},
+									{Name: "b", Value: "b1"},
+								},
+								Samples: []client.Sample{
+									{
+										Value:       8,
+										TimestampMs: 1,
+									},
+									{
+										Value:       9,
+										TimestampMs: 2,
+									},
+								},
+							},
+						},
+					},
+				},
+				nil,
+			)),
+			fn: func(t *testing.T, q *DownstreamQuerier) {
+				set, _, err := q.Select(
+					nil,
+					exactMatch("__name__", astmapper.EmbeddedQueryFlag),
+					exactMatch(astmapper.QueryLabel, astmapper.JSONCodec.Encode([]string{`http_requests_total{cluster="prod"}`})),
+				)
+				require.Nil(t, err)
+				require.Equal(
+					t,
+					NewSeriesSet([]SampleStream{
+						{
+							Labels: []client.LabelAdapter{
+								{Name: "a", Value: "a1"},
+								{Name: "b", Value: "b1"},
+							},
+							Samples: []client.Sample{
+								{
+									Value:       1,
+									TimestampMs: 1,
+								},
+								{
+									Value:       2,
+									TimestampMs: 2,
+								},
+							},
+						},
+						{
+							Labels: []client.LabelAdapter{
+								{Name: "a", Value: "a1"},
+								{Name: "b", Value: "b1"},
+							},
+							Samples: []client.Sample{
+								{
+									Value:       8,
+									TimestampMs: 1,
+								},
+								{
+									Value:       9,
+									TimestampMs: 2,
+								},
+							},
+						},
+					}),
+					set,
+				)
+			},
+		},
+	}
+
+	for _, c := range testExpr {
+		t.Run(c.name, func(t *testing.T) {
+			c.fn(t, c.querier)
+		})
+	}
+}
+
+func TestSelectConcurrent(t *testing.T) {
+	for _, c := range []struct {
+		name    string
+		queries []string
+		err     error
+	}{
+		{
+			name: "concats queries",
+			queries: []string{
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m]))`,
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m]))`,
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))`,
+			},
+			err: nil,
+		},
+		{
+			name: "errors",
+			queries: []string{
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="0_of_3",baz="blip"}[1m]))`,
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="1_of_3",baz="blip"}[1m]))`,
+				`sum by(__cortex_shard__) (rate(bar1{__cortex_shard__="2_of_3",baz="blip"}[1m]))`,
+			},
+			err: errors.Errorf("some-err"),
+		},
+	} {
+
+		t.Run(c.name, func(t *testing.T) {
+			// each request will return a single samplestream
+			querier := mkQuerier(mockHandler(&PrometheusResponse{
+				Data: PrometheusData{
+					ResultType: promql.ValueTypeVector,
+					Result: []SampleStream{
+						{
+							Labels: []client.LabelAdapter{
+								{Name: "a", Value: "1"},
+							},
+							Samples: []client.Sample{
+								{
+									Value:       1,
+									TimestampMs: 1,
+								},
+							},
+						},
+					},
+				},
+			}, c.err))
+
+			set, _, err := querier.Select(
+				nil,
+				exactMatch("__name__", astmapper.EmbeddedQueryFlag),
+				exactMatch(astmapper.QueryLabel, astmapper.JSONCodec.Encode(c.queries)),
+			)
+
+			if c.err != nil {
+				require.EqualError(t, err, c.err.Error())
+				return
+			}
+
+			var ct int
+			for set.Next() {
+				ct++
+			}
+			require.Equal(t, len(c.queries), ct)
+
+		})
+	}
+}
+
+func exactMatch(k, v string) *labels.Matcher {
+	m, err := labels.NewMatcher(labels.MatchEqual, k, v)
+	if err != nil {
+		panic(err)
+	}
+	return m
+
+}
+
+func mkQuerier(handler Handler) *DownstreamQuerier {
+	return &DownstreamQuerier{context.Background(), &PrometheusRequest{}, handler}
+}
diff --git a/pkg/querier/queryrange/querysharding.go b/pkg/querier/queryrange/querysharding.go
new file mode 100644
index 00000000000..755371fd0db
--- /dev/null
+++ b/pkg/querier/queryrange/querysharding.go
@@ -0,0 +1,209 @@
+package queryrange
+
+import (
+	"context"
+	fmt "fmt"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/cortexproject/cortex/pkg/querier/lazyquery"
+	"github.com/go-kit/kit/log"
+	"github.com/go-kit/kit/log/level"
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/promql"
+)
+
+const (
+	downStreamErrType = "downstream error"
+)
+
+var (
+	nanosecondsInMillisecond = int64(time.Millisecond / time.Nanosecond)
+
+	errInvalidShardingRange = errors.New("Query does not fit in a single sharding configuration")
+)
+
+// ShardingConfigs is a slice of chunk shard configs
+type ShardingConfigs []chunk.PeriodConfig
+
+// ValidRange extracts a non-overlapping sharding configuration from a list of configs and a time range.
+func (confs ShardingConfigs) ValidRange(start, end int64) (chunk.PeriodConfig, error) {
+	for i, conf := range confs {
+		if start < int64(conf.From.Time) {
+			// the query starts before this config's range
+			return chunk.PeriodConfig{}, errInvalidShardingRange
+		} else if i == len(confs)-1 {
+			// the last configuration has no upper bound
+			return conf, nil
+		} else if end < int64(confs[i+1].From.Time) {
+			// The request is entirely scoped into this shard config
+			return conf, nil
+		} else {
+			continue
+		}
+	}
+
+	return chunk.PeriodConfig{}, errInvalidShardingRange
+}
+
+func (confs ShardingConfigs) hasShards() bool {
+	for _, conf := range confs {
+		if conf.RowShards > 0 {
+			return true
+		}
+	}
+	return false
+}
+
+func mapQuery(mapper astmapper.ASTMapper, query string) (promql.Node, error) {
+	expr, err := promql.ParseExpr(query)
+	if err != nil {
+		return nil, err
+	}
+	return mapper.Map(expr)
+}
+
+// NewQueryShardMiddleware creates a middleware which downstreams queries after AST mapping and query encoding.
+func NewQueryShardMiddleware(logger log.Logger, engine *promql.Engine, confs ShardingConfigs) (Middleware, Middleware) {
+	passthrough := MiddlewareFunc(func(next Handler) Handler {
+		return next
+	})
+
+	noshards := !confs.hasShards()
+
+	if noshards {
+		level.Warn(logger).Log(
+			"middleware", "QueryShard",
+			"msg", "no configuration with shard found",
+			"confs", fmt.Sprintf("%+v", confs),
+		)
+		return passthrough, passthrough
+	}
+
+	getConf := func(r Request) (chunk.PeriodConfig, error) {
+		conf, err := confs.ValidRange(r.GetStart(), r.GetEnd())
+
+		// query exists across multiple sharding configs
+		if err != nil {
+			return conf, err
+		}
+
+		// query doesn't have shard factor, so don't try to do AST mapping.
+		if conf.RowShards < 2 {
+			return conf, errors.Errorf("shard factor not high enough: [%d]", conf.RowShards)
+		}
+
+		return conf, nil
+	}
+
+	mapperware := MiddlewareFunc(func(next Handler) Handler {
+		return &astMapperware{
+			getConf: getConf,
+			logger:  log.With(logger, "middleware", "QueryShard.astMapperware"),
+			next:    next,
+		}
+	})
+
+	shardingware := MiddlewareFunc(func(next Handler) Handler {
+		return &queryShard{
+			getConf: getConf,
+			next:    next,
+			engine:  engine,
+		}
+	})
+
+	return mapperware, shardingware
+}
+
+type astMapperware struct {
+	getConf func(Request) (chunk.PeriodConfig, error)
+	logger  log.Logger
+	next    Handler
+}
+
+func (ast *astMapperware) Do(ctx context.Context, r Request) (Response, error) {
+	conf, err := ast.getConf(r)
+	// cannot shard with this timerange
+	if err != nil {
+		level.Warn(ast.logger).Log("err", err.Error())
+		return ast.next.Do(ctx, r)
+	}
+
+	shardSummer, err := astmapper.NewShardSummer(int(conf.RowShards), astmapper.VectorSquasher)
+	if err != nil {
+		return nil, err
+	}
+
+	subtreeFolder, err := astmapper.NewSubtreeFolder(astmapper.JSONCodec)
+	if err != nil {
+		return nil, err
+	}
+
+	strQuery := r.GetQuery()
+	mappedQuery, err := mapQuery(
+		astmapper.NewMultiMapper(
+			shardSummer,
+			subtreeFolder,
+		),
+		strQuery,
+	)
+
+	if err != nil {
+		return nil, err
+	}
+
+	strMappedQuery := mappedQuery.String()
+	level.Debug(ast.logger).Log("msg", "mapped query", "original", strQuery, "mapped", strMappedQuery)
+
+	return ast.next.Do(ctx, r.WithQuery(strMappedQuery))
+
+}
+
+type queryShard struct {
+	getConf func(Request) (chunk.PeriodConfig, error)
+	next    Handler
+	engine  *promql.Engine
+}
+
+func (qs *queryShard) Do(ctx context.Context, r Request) (Response, error) {
+	// since there's no available sharding configuration for this time range,
+	// no astmapping has been performed, so skip this middleware.
+	if _, err := qs.getConf(r); err != nil {
+		return qs.next.Do(ctx, r)
+	}
+
+	queryable := lazyquery.NewLazyQueryable(&DownstreamQueryable{r, qs.next})
+
+	qry, err := qs.engine.NewRangeQuery(
+		queryable,
+		r.GetQuery(),
+		TimeFromMillis(r.GetStart()),
+		TimeFromMillis(r.GetEnd()),
+		time.Duration(r.GetStep())*time.Millisecond,
+	)
+
+	if err != nil {
+		return nil, err
+	}
+	res := qry.Exec(ctx)
+	extracted, err := FromResult(res)
+	if err != nil {
+		return nil, err
+
+	}
+	return &PrometheusResponse{
+		Status: StatusSuccess,
+		Data: PrometheusData{
+			ResultType: string(res.Value.Type()),
+			Result:     extracted,
+		},
+	}, nil
+}
+
+// TimeFromMillis is a helper to turn milliseconds -> time.Time
+func TimeFromMillis(ms int64) time.Time {
+	secs := ms / 1000
+	rem := ms - (secs * 1000)
+	return time.Unix(secs, rem*nanosecondsInMillisecond)
+}
diff --git a/pkg/querier/queryrange/querysharding_test.go b/pkg/querier/queryrange/querysharding_test.go
new file mode 100644
index 00000000000..87bec429ade
--- /dev/null
+++ b/pkg/querier/queryrange/querysharding_test.go
@@ -0,0 +1,573 @@
+package queryrange
+
+import (
+	"context"
+	"fmt"
+	"math"
+	"runtime"
+	"testing"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/util"
+	"github.com/go-kit/kit/log"
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage"
+	"github.com/stretchr/testify/require"
+)
+
+func TestQueryshardingMiddleware(t *testing.T) {
+	var testExpr = []struct {
+		name     string
+		next     Handler
+		input    Request
+		ctx      context.Context
+		expected *PrometheusResponse
+		err      bool
+		override func(*testing.T, Handler)
+	}{
+		{
+			name: "invalid query error",
+			// if the query parses correctly force it to succeed
+			next: mockHandler(&PrometheusResponse{
+				Status: "",
+				Data: PrometheusData{
+					ResultType: promql.ValueTypeVector,
+					Result:     []SampleStream{},
+				},
+				ErrorType: "",
+				Error:     "",
+			}, nil),
+			input:    &PrometheusRequest{Query: "^GARBAGE"},
+			ctx:      context.Background(),
+			expected: nil,
+			err:      true,
+		},
+		{
+			name:     "downstream err",
+			next:     mockHandler(nil, errors.Errorf("some err")),
+			input:    defaultReq(),
+			ctx:      context.Background(),
+			expected: nil,
+			err:      true,
+		},
+		{
+			name: "successful trip",
+			next: mockHandler(sampleMatrixResponse(), nil),
+			override: func(t *testing.T, handler Handler) {
+
+				// pre-encode the query so it doesn't try to re-split. We're just testing if it passes through correctly
+				qry := defaultReq().WithQuery(
+					`__embedded_queries__{__cortex_queries__="{\"Concat\":[\"http_requests_total{cluster=\\\"prod\\\"}\"]}"}`,
+				)
+				out, err := handler.Do(context.Background(), qry)
+				require.Nil(t, err)
+				require.Equal(t, promql.ValueTypeMatrix, out.(*PrometheusResponse).Data.ResultType)
+				require.Equal(t, sampleMatrixResponse(), out)
+			},
+		},
+	}
+
+	for _, c := range testExpr {
+		t.Run(c.name, func(t *testing.T) {
+			engine := promql.NewEngine(promql.EngineOpts{
+				Logger:        util.Logger,
+				Reg:           nil,
+				MaxConcurrent: 10,
+				MaxSamples:    1000,
+				Timeout:       time.Minute,
+			})
+
+			mapperware, shardingware := NewQueryShardMiddleware(
+				log.NewNopLogger(),
+				engine,
+				ShardingConfigs{
+					{
+						RowShards: 3,
+					},
+				},
+			)
+			handler := MergeMiddlewares(mapperware, shardingware).Wrap(c.next)
+
+			// escape hatch for custom tests
+			if c.override != nil {
+				c.override(t, handler)
+				return
+			}
+
+			out, err := handler.Do(c.ctx, c.input)
+
+			if c.err {
+				require.NotNil(t, err)
+			} else {
+				require.Nil(t, err)
+				require.Equal(t, c.expected, out)
+			}
+
+		})
+	}
+}
+
+func sampleMatrixResponse() *PrometheusResponse {
+	return &PrometheusResponse{
+		Status: StatusSuccess,
+		Data: PrometheusData{
+			ResultType: promql.ValueTypeMatrix,
+			Result: []SampleStream{
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							TimestampMs: 5,
+							Value:       1,
+						},
+						{
+							TimestampMs: 10,
+							Value:       2,
+						},
+					},
+				},
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							TimestampMs: 5,
+							Value:       8,
+						},
+						{
+							TimestampMs: 10,
+							Value:       9,
+						},
+					},
+				},
+			},
+		},
+	}
+}
+
+func mockHandler(resp *PrometheusResponse, err error) Handler {
+	return HandlerFunc(func(ctx context.Context, req Request) (Response, error) {
+		if expired := ctx.Err(); expired != nil {
+			return nil, expired
+		}
+
+		return resp, err
+	})
+}
+
+func defaultReq() *PrometheusRequest {
+	return &PrometheusRequest{
+		Path:    "/query_range",
+		Start:   00,
+		End:     10,
+		Step:    5,
+		Timeout: time.Minute,
+		Query:   `sum(rate(http_requests_total{}[5m]))`,
+	}
+}
+
+func TestShardingConfigs_ValidRange(t *testing.T) {
+	reqWith := func(start, end string) *PrometheusRequest {
+		r := defaultReq()
+
+		if start != "" {
+			r.Start = int64(parseDate(start))
+		}
+
+		if end != "" {
+			r.End = int64(parseDate(end))
+		}
+
+		return r
+	}
+
+	var testExpr = []struct {
+		name     string
+		confs    ShardingConfigs
+		req      *PrometheusRequest
+		expected chunk.PeriodConfig
+		err      error
+	}{
+		{
+			name:  "0 ln configs fail",
+			confs: ShardingConfigs{},
+			req:   defaultReq(),
+			err:   errInvalidShardingRange,
+		},
+		{
+			name: "request starts before beginning config",
+			confs: ShardingConfigs{
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-10-16")},
+					RowShards: 1,
+				},
+			},
+			req: reqWith("2019-10-15", ""),
+			err: errInvalidShardingRange,
+		},
+		{
+			name: "request spans multiple configs",
+			confs: ShardingConfigs{
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-10-16")},
+					RowShards: 1,
+				},
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-11-16")},
+					RowShards: 2,
+				},
+			},
+			req: reqWith("2019-10-15", "2019-11-17"),
+			err: errInvalidShardingRange,
+		},
+		{
+			name: "selects correct config ",
+			confs: ShardingConfigs{
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-10-16")},
+					RowShards: 1,
+				},
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-11-16")},
+					RowShards: 2,
+				},
+				{
+					From:      chunk.DayTime{Time: parseDate("2019-12-16")},
+					RowShards: 3,
+				},
+			},
+			req: reqWith("2019-11-20", "2019-11-25"),
+			expected: chunk.PeriodConfig{
+				From:      chunk.DayTime{Time: parseDate("2019-11-16")},
+				RowShards: 2,
+			},
+		},
+	}
+
+	for _, c := range testExpr {
+		t.Run(c.name, func(t *testing.T) {
+			out, err := c.confs.ValidRange(c.req.Start, c.req.End)
+
+			if c.err != nil {
+				require.EqualError(t, err, c.err.Error())
+			} else {
+				require.Nil(t, err)
+				require.Equal(t, c.expected, out)
+			}
+		})
+	}
+}
+
+func TestTimeFromMillis(t *testing.T) {
+	var testExpr = []struct {
+		input    int64
+		expected time.Time
+	}{
+		{input: 1000, expected: time.Unix(1, 0)},
+		{input: 1500, expected: time.Unix(1, 500*nanosecondsInMillisecond)},
+	}
+
+	for i, c := range testExpr {
+		t.Run(string(i), func(t *testing.T) {
+			res := TimeFromMillis(c.input)
+			require.Equal(t, c.expected, res)
+		})
+	}
+}
+
+func parseDate(in string) model.Time {
+	t, err := time.Parse("2006-01-02", in)
+	if err != nil {
+		panic(err)
+	}
+	return model.Time(t.UnixNano())
+}
+
+// mappingValidator can be injected into a middleware chain to assert that a query matches an expected query
+type mappingValidator struct {
+	t        *testing.T
+	expected string
+	next     Handler
+}
+
+func (v *mappingValidator) Do(ctx context.Context, req Request) (Response, error) {
+	expr, err := promql.ParseExpr(req.GetQuery())
+	require.Nil(v.t, err)
+
+	require.Equal(v.t, v.expected, expr.String())
+	return v.next.Do(ctx, req)
+}
+
+// approximatelyEquals ensures two responses are approximately equal, up to 6 decimals precision per sample
+func approximatelyEquals(t *testing.T, a, b *PrometheusResponse) {
+	require.Equal(t, a.Status, b.Status)
+	if a.Status != StatusSuccess {
+		return
+	}
+	as, err := ResponseToSamples(a)
+	require.Nil(t, err)
+	bs, err := ResponseToSamples(b)
+	require.Nil(t, err)
+
+	require.Equal(t, len(as), len(bs))
+
+	for i := 0; i < len(as); i++ {
+		a := as[i]
+		b := bs[i]
+		require.Equal(t, a.Labels, b.Labels)
+		require.Equal(t, len(a.Samples), len(b.Samples))
+
+		for j := 0; j < len(a.Samples); j++ {
+			aSample := &a.Samples[j]
+			aSample.Value = math.Round(aSample.Value*1e6) / 1e6
+			bSample := &b.Samples[j]
+			bSample.Value = math.Round(bSample.Value*1e6) / 1e6
+		}
+		require.Equal(t, a, b)
+	}
+}
+
+func TestQueryshardingCorrectness(t *testing.T) {
+	shardFactor := 2
+	req := &PrometheusRequest{
+		Path:  "/query_range",
+		Start: start.UnixNano() / nanosecondsInMillisecond,
+		End:   end.UnixNano() / nanosecondsInMillisecond,
+		Step:  int64(step) / int64(time.Second),
+	}
+	for _, tc := range []struct {
+		desc   string
+		query  string
+		mapped string
+	}{
+		{
+			desc:   "fully encoded histogram_quantile",
+			query:  `histogram_quantile(0.5, rate(bar1{baz="blip"}[30s]))`,
+			mapped: `__embedded_queries__{__cortex_queries__="{\"Concat\":[\"histogram_quantile(0.5, rate(bar1{baz=\\\"blip\\\"}[30s]))\"]}"}`,
+		},
+		{
+			desc:   "entire query with shard summer",
+			query:  `sum by (foo,bar) (min_over_time(bar1{baz="blip"}[1m]))`,
+			mapped: `sum by(foo, bar) (__embedded_queries__{__cortex_queries__="{\"Concat\":[\"sum by(foo, bar, __cortex_shard__) (min_over_time(bar1{__cortex_shard__=\\\"0_of_2\\\",baz=\\\"blip\\\"}[1m]))\",\"sum by(foo, bar, __cortex_shard__) (min_over_time(bar1{__cortex_shard__=\\\"1_of_2\\\",baz=\\\"blip\\\"}[1m]))\"]}"})`,
+		},
+		{
+			desc:   "shard one leg encode the other",
+			query:  "sum(rate(bar1[1m])) or rate(bar1[1m])",
+			mapped: `sum without(__cortex_shard__) (__embedded_queries__{__cortex_queries__="{\"Concat\":[\"sum by(__cortex_shard__) (rate(bar1{__cortex_shard__=\\\"0_of_2\\\"}[1m]))\",\"sum by(__cortex_shard__) (rate(bar1{__cortex_shard__=\\\"1_of_2\\\"}[1m]))\"]}"}) or __embedded_queries__{__cortex_queries__="{\"Concat\":[\"rate(bar1[1m])\"]}"}`,
+		},
+		{
+			desc:   "should skip encoding leaf scalar/strings",
+			query:  `histogram_quantile(0.5, sum(rate(cortex_cache_value_size_bytes_bucket[5m])) by (le))`,
+			mapped: `histogram_quantile(0.5, sum by(le) (__embedded_queries__{__cortex_queries__="{\"Concat\":[\"sum by(le, __cortex_shard__) (rate(cortex_cache_value_size_bytes_bucket{__cortex_shard__=\\\"0_of_2\\\"}[5m]))\",\"sum by(le, __cortex_shard__) (rate(cortex_cache_value_size_bytes_bucket{__cortex_shard__=\\\"1_of_2\\\"}[5m]))\"]}"}))`,
+		},
+	} {
+		t.Run(tc.desc, func(t *testing.T) {
+			mapperware, shardingware := NewQueryShardMiddleware(
+				log.NewNopLogger(),
+				engine,
+				// ensure that all requests are shard compatbile
+				ShardingConfigs{
+					chunk.PeriodConfig{
+						Schema:    "v10",
+						RowShards: uint32(shardFactor),
+					},
+				},
+			)
+
+			downstream := &downstreamHandler{
+				engine:    engine,
+				queryable: shardAwareQueryable,
+			}
+
+			assertionMWare := MiddlewareFunc(func(next Handler) Handler {
+				return &mappingValidator{
+					t:        t,
+					expected: tc.mapped,
+					next:     next,
+				}
+			})
+
+			shardedMWare := MergeMiddlewares(mapperware, assertionMWare, shardingware).Wrap(downstream)
+			passthrough := downstream
+
+			r := req.WithQuery(tc.query)
+			shardedRes, err := shardedMWare.Do(context.Background(), r)
+			require.Nil(t, err)
+
+			res, err := passthrough.Do(context.Background(), r)
+			require.Nil(t, err)
+
+			approximatelyEquals(t, res.(*PrometheusResponse), shardedRes.(*PrometheusResponse))
+		})
+	}
+}
+
+func BenchmarkQuerySharding(b *testing.B) {
+
+	var shards []uint32
+
+	// max out at half available cpu cores in order to minimize noisy neighbor issues while benchmarking
+	for shard := 1; shard <= runtime.NumCPU()/2; shard = shard * 2 {
+		shards = append(shards, uint32(shard))
+	}
+
+	for _, tc := range []struct {
+		labelBuckets     int
+		labels           []string
+		samplesPerSeries int
+		query            string
+		desc             string
+	}{
+		// Ensure you have enough cores to run these tests without blocking.
+		// We want to simulate parallel computations and waiting in queue doesn't help
+
+		// no-group
+		{
+			labelBuckets:     16,
+			labels:           []string{"a", "b", "c"},
+			samplesPerSeries: 100,
+			query:            `sum(rate(http_requests_total[5m]))`,
+			desc:             "sum nogroup",
+		},
+		// sum by
+		{
+			labelBuckets:     16,
+			labels:           []string{"a", "b", "c"},
+			samplesPerSeries: 100,
+			query:            `sum by(a) (rate(http_requests_total[5m]))`,
+			desc:             "sum by",
+		},
+		// sum without
+		{
+			labelBuckets:     16,
+			labels:           []string{"a", "b", "c"},
+			samplesPerSeries: 100,
+			query:            `sum without (a) (rate(http_requests_total[5m]))`,
+			desc:             "sum without",
+		},
+	} {
+		for _, delayPerSeries := range []time.Duration{
+			0,
+			time.Millisecond / 10,
+		} {
+			engine := promql.NewEngine(promql.EngineOpts{
+				Logger: util.Logger,
+				Reg:    nil,
+				// set high concurrency so we're not bottlenecked here
+				MaxConcurrent: 100000,
+				MaxSamples:    100000000,
+				Timeout:       time.Minute,
+			})
+
+			queryable := NewMockShardedQueryable(
+				tc.samplesPerSeries,
+				tc.labels,
+				tc.labelBuckets,
+				delayPerSeries,
+			)
+			downstream := &downstreamHandler{
+				engine:    engine,
+				queryable: queryable,
+			}
+
+			var (
+				start int64 = 0
+				end         = int64(1000 * tc.samplesPerSeries)
+				step        = (end - start) / 1000
+			)
+
+			req := &PrometheusRequest{
+				Path:    "/query_range",
+				Start:   start,
+				End:     end,
+				Step:    step,
+				Timeout: time.Minute,
+				Query:   tc.query,
+			}
+
+			for _, shardFactor := range shards {
+				mapperware, shardingware := NewQueryShardMiddleware(
+					log.NewNopLogger(),
+					engine,
+					// ensure that all requests are shard compatbile
+					ShardingConfigs{
+						chunk.PeriodConfig{
+							Schema:    "v10",
+							RowShards: shardFactor,
+						},
+					},
+				)
+
+				mware := MergeMiddlewares(mapperware, shardingware).Wrap(downstream)
+
+				b.Run(
+					fmt.Sprintf(
+						"desc:[%s]---shards:[%d]---series:[%.0f]---delayPerSeries:[%s]---samplesPerSeries:[%d]",
+						tc.desc,
+						shardFactor,
+						math.Pow(float64(tc.labelBuckets), float64(len(tc.labels))),
+						delayPerSeries,
+						tc.samplesPerSeries,
+					),
+					func(b *testing.B) {
+						for n := 0; n < b.N; n++ {
+							_, err := mware.Do(
+								context.Background(),
+								req,
+							)
+							if err != nil {
+								b.Fatal(err.Error())
+							}
+						}
+					},
+				)
+			}
+			fmt.Println()
+		}
+
+		fmt.Print("--------------------------------\n\n")
+	}
+}
+
+type downstreamHandler struct {
+	engine    *promql.Engine
+	queryable storage.Queryable
+}
+
+func (h *downstreamHandler) Do(ctx context.Context, r Request) (Response, error) {
+	qry, err := h.engine.NewRangeQuery(
+		h.queryable,
+		r.GetQuery(),
+		TimeFromMillis(r.GetStart()),
+		TimeFromMillis(r.GetEnd()),
+		time.Duration(r.GetStep())*time.Millisecond,
+	)
+
+	if err != nil {
+		return nil, err
+	}
+
+	res := qry.Exec(ctx)
+	extracted, err := FromResult(res)
+	if err != nil {
+		return nil, err
+
+	}
+
+	return &PrometheusResponse{
+		Status: StatusSuccess,
+		Data: PrometheusData{
+			ResultType: string(res.Value.Type()),
+			Result:     extracted,
+		},
+	}, nil
+}
diff --git a/pkg/querier/queryrange/results_cache.go b/pkg/querier/queryrange/results_cache.go
index 58dca04e36d..d32fa2f15db 100644
--- a/pkg/querier/queryrange/results_cache.go
+++ b/pkg/querier/queryrange/results_cache.go
@@ -54,7 +54,7 @@ func (e ExtractorFunc) Extract(start, end int64, from Response) Response {
 var PrometheusResponseExtractor = ExtractorFunc(func(start, end int64, from Response) Response {
 	promRes := from.(*PrometheusResponse)
 	return &PrometheusResponse{
-		Status: statusSuccess,
+		Status: StatusSuccess,
 		Data: PrometheusData{
 			ResultType: promRes.Data.ResultType,
 			Result:     extractMatrix(start, end, promRes.Data.Result),
diff --git a/pkg/querier/queryrange/results_cache_test.go b/pkg/querier/queryrange/results_cache_test.go
index 32fb344c1d0..7ff8093cfab 100644
--- a/pkg/querier/queryrange/results_cache_test.go
+++ b/pkg/querier/queryrange/results_cache_test.go
@@ -51,7 +51,7 @@ var (
 )
 
 var dummyResponse = &PrometheusResponse{
-	Status: statusSuccess,
+	Status: StatusSuccess,
 	Data: PrometheusData{
 		ResultType: matrix,
 		Result: []SampleStream{
@@ -80,7 +80,7 @@ func mkAPIResponse(start, end, step int64) *PrometheusResponse {
 	}
 
 	return &PrometheusResponse{
-		Status: statusSuccess,
+		Status: StatusSuccess,
 		Data: PrometheusData{
 			ResultType: matrix,
 			Result: []SampleStream{
diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go
index 2f1dc6e2487..3517ba2dc99 100644
--- a/pkg/querier/queryrange/roundtrip.go
+++ b/pkg/querier/queryrange/roundtrip.go
@@ -24,8 +24,10 @@ import (
 
 	"github.com/go-kit/kit/log"
 	"github.com/go-kit/kit/log/level"
+	"github.com/prometheus/prometheus/promql"
 	"github.com/weaveworks/common/user"
 
+	"github.com/cortexproject/cortex/pkg/chunk"
 	"github.com/cortexproject/cortex/pkg/querier/frontend"
 )
 
@@ -39,6 +41,7 @@ type Config struct {
 	ResultsCacheConfig     `yaml:"results_cache"`
 	CacheResults           bool `yaml:"cache_results"`
 	MaxRetries             int  `yaml:"max_retries"`
+	SumShards              bool `yaml:"sum_shards"`
 }
 
 // RegisterFlags adds the flags required to config this to the given FlagSet.
@@ -48,6 +51,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.DurationVar(&cfg.SplitQueriesByInterval, "querier.split-queries-by-interval", 0, "Split queries by an interval and execute in parallel, 0 disables it. You should use an a multiple of 24 hours (same as the storage bucketing scheme), to avoid queriers downloading and processing the same chunks.")
 	f.BoolVar(&cfg.AlignQueriesWithStep, "querier.align-querier-with-step", false, "Mutate incoming queries to align their start and end with their step.")
 	f.BoolVar(&cfg.CacheResults, "querier.cache-results", false, "Cache query results.")
+	f.BoolVar(&cfg.SumShards, "querier.sum-shards", false, "Parse the ast and parallelize sums by shard.")
 	cfg.ResultsCacheConfig.RegisterFlags(f)
 }
 
@@ -89,11 +93,33 @@ func MergeMiddlewares(middleware ...Middleware) Middleware {
 }
 
 // NewTripperware returns a Tripperware configured with middlewares to limit, align, split, retry and cache requests.
-func NewTripperware(cfg Config, log log.Logger, limits Limits, codec Codec, cacheExtractor Extractor) (frontend.Tripperware, error) {
+func NewTripperware(
+	cfg Config,
+	log log.Logger,
+	limits Limits,
+	codec Codec,
+	cacheExtractor Extractor,
+	schema chunk.SchemaConfig,
+	engineOpts promql.EngineOpts,
+) (frontend.Tripperware, error) {
 	queryRangeMiddleware := []Middleware{LimitsMiddleware(limits)}
 	if cfg.AlignQueriesWithStep {
 		queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("step_align"), StepAlignMiddleware)
 	}
+
+	var shardingware Middleware
+	if cfg.SumShards {
+		var mapperware Middleware
+		mapperware, shardingware = NewQueryShardMiddleware(log, promql.NewEngine(engineOpts), schema.Configs)
+
+		// only add mapperware at this point -- shardingware will be added after splitting/caching/retry middlewares.
+		queryRangeMiddleware = append(
+			queryRangeMiddleware,
+			InstrumentMiddleware("sum_shards.astMapperware"),
+			mapperware,
+		)
+	}
+
 	// SplitQueriesByDay is deprecated use SplitQueriesByInterval.
 	if cfg.SplitQueriesByDay {
 		level.Warn(log).Log("msg", "flag querier.split-queries-by-day (or config split_queries_by_day) is deprecated, use querier.split-queries-by-interval instead.")
@@ -102,6 +128,7 @@ func NewTripperware(cfg Config, log log.Logger, limits Limits, codec Codec, cach
 	if cfg.SplitQueriesByInterval != 0 {
 		queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("split_by_interval"), SplitByIntervalMiddleware(cfg.SplitQueriesByInterval, limits, codec))
 	}
+
 	if cfg.CacheResults {
 		queryCacheMiddleware, err := NewResultsCacheMiddleware(log, cfg.ResultsCacheConfig, limits, codec, cacheExtractor)
 		if err != nil {
@@ -113,6 +140,15 @@ func NewTripperware(cfg Config, log log.Logger, limits Limits, codec Codec, cach
 		queryRangeMiddleware = append(queryRangeMiddleware, InstrumentMiddleware("retry"), NewRetryMiddleware(log, cfg.MaxRetries))
 	}
 
+	// the other half of the query sharding middleware should be added at the end of the middleware chain
+	if shardingware != nil {
+		queryRangeMiddleware = append(
+			queryRangeMiddleware,
+			InstrumentMiddleware("sum_shards.queryShard"),
+			shardingware,
+		)
+	}
+
 	return frontend.Tripperware(func(next http.RoundTripper) http.RoundTripper {
 		// Finally, if the user selected any query range middleware, stitch it in.
 		if len(queryRangeMiddleware) > 0 {
diff --git a/pkg/querier/queryrange/series.go b/pkg/querier/queryrange/series.go
new file mode 100644
index 00000000000..85a7b135531
--- /dev/null
+++ b/pkg/querier/queryrange/series.go
@@ -0,0 +1,55 @@
+package queryrange
+
+import (
+	"github.com/cortexproject/cortex/pkg/querier/series"
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/prometheus/prometheus/storage"
+)
+
+// ResponseToSamples is needed to map back from api response to the underlying series data
+func ResponseToSamples(resp Response) ([]SampleStream, error) {
+	promRes, ok := resp.(*PrometheusResponse)
+	if !ok {
+		return nil, errors.Errorf("error invalid response type: %T, expected: %T", resp, &PrometheusResponse{})
+	}
+	if promRes.Error != "" {
+		return nil, errors.New(promRes.Error)
+	}
+	switch promRes.Data.ResultType {
+	case promql.ValueTypeVector, promql.ValueTypeMatrix:
+		return promRes.Data.Result, nil
+	}
+
+	return nil, errors.Errorf(
+		"Invalid promql.Value type: [%s]. Only %s and %s supported",
+		promRes.Data.ResultType,
+		promql.ValueTypeVector,
+		promql.ValueTypeMatrix,
+	)
+}
+
+// NewSeriesSet returns an in memory storage.SeriesSet from a []SampleStream
+func NewSeriesSet(results []SampleStream) storage.SeriesSet {
+
+	set := make([]storage.Series, 0, len(results))
+
+	for _, stream := range results {
+		samples := make([]model.SamplePair, 0, len(stream.Samples))
+		for _, sample := range stream.Samples {
+			samples = append(samples, model.SamplePair{
+				Timestamp: model.Time(sample.TimestampMs),
+				Value:     model.SampleValue(sample.Value),
+			})
+		}
+
+		ls := make([]labels.Label, 0, len(stream.Labels))
+		for _, l := range stream.Labels {
+			ls = append(ls, labels.Label(l))
+		}
+		set = append(set, series.NewConcreteSeries(ls, samples))
+	}
+	return series.NewConcreteSeriesSet(set)
+}
diff --git a/pkg/querier/queryrange/series_test.go b/pkg/querier/queryrange/series_test.go
new file mode 100644
index 00000000000..262503e8538
--- /dev/null
+++ b/pkg/querier/queryrange/series_test.go
@@ -0,0 +1,75 @@
+package queryrange
+
+import (
+	"testing"
+
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func Test_ResponseToSamples(t *testing.T) {
+	input := &PrometheusResponse{
+		Data: PrometheusData{
+			ResultType: promql.ValueTypeMatrix,
+			Result: []SampleStream{
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       1,
+							TimestampMs: 1,
+						},
+						{
+							Value:       2,
+							TimestampMs: 2,
+						},
+					},
+				},
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       8,
+							TimestampMs: 1,
+						},
+						{
+							Value:       9,
+							TimestampMs: 2,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	streams, err := ResponseToSamples(input)
+	require.Nil(t, err)
+	set := NewSeriesSet(streams)
+
+	setCt := 0
+
+	for set.Next() {
+		iter := set.At().Iterator()
+		require.Nil(t, set.Err())
+
+		sampleCt := 0
+		for iter.Next() {
+			ts, v := iter.At()
+			require.Equal(t, input.Data.Result[setCt].Samples[sampleCt].TimestampMs, ts)
+			require.Equal(t, input.Data.Result[setCt].Samples[sampleCt].Value, v)
+			sampleCt++
+		}
+		require.Equal(t, len(input.Data.Result[setCt].Samples), sampleCt)
+		setCt++
+	}
+
+	require.Equal(t, len(input.Data.Result), setCt)
+
+}
diff --git a/pkg/querier/queryrange/test_utils.go b/pkg/querier/queryrange/test_utils.go
new file mode 100644
index 00000000000..c95947518f4
--- /dev/null
+++ b/pkg/querier/queryrange/test_utils.go
@@ -0,0 +1,185 @@
+package queryrange
+
+import (
+	"context"
+	"fmt"
+	"time"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/cortexproject/cortex/pkg/querier/series"
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/storage"
+)
+
+// genLabels will create a slice of labels where each label has an equal chance to occupy a value from [0,labelBuckets]. It returns a slice of length labelBuckets^len(labelSet)
+func genLabels(
+	labelSet []string,
+	labelBuckets int,
+) (result []labels.Labels) {
+	if len(labelSet) == 0 {
+		return result
+	}
+
+	l := labelSet[0]
+	rest := genLabels(labelSet[1:], labelBuckets)
+
+	for i := 0; i < labelBuckets; i++ {
+		x := labels.Label{
+			Name:  l,
+			Value: fmt.Sprintf("%d", i),
+		}
+		if len(rest) == 0 {
+			set := labels.Labels{x}
+			result = append(result, set)
+			continue
+		}
+		for _, others := range rest {
+			set := append(others, x)
+			result = append(result, set)
+		}
+	}
+	return result
+
+}
+
+// NewMockShardedQueryable creates a shard-aware in memory queryable.
+func NewMockShardedQueryable(
+	nSamples int,
+	labelSet []string,
+	labelBuckets int,
+	delayPerSeries time.Duration,
+) *MockShardedQueryable {
+	samples := make([]model.SamplePair, 0, nSamples)
+	for i := 0; i < nSamples; i++ {
+		samples = append(samples, model.SamplePair{
+			Timestamp: model.Time(i * 1000),
+			Value:     model.SampleValue(i),
+		})
+	}
+	sets := genLabels(labelSet, labelBuckets)
+	xs := make([]storage.Series, 0, len(sets))
+	for _, ls := range sets {
+		xs = append(xs, series.NewConcreteSeries(ls, samples))
+	}
+
+	return &MockShardedQueryable{
+		series:         xs,
+		delayPerSeries: delayPerSeries,
+	}
+}
+
+// MockShardedQueryable is exported to be reused in the querysharding benchmarking
+type MockShardedQueryable struct {
+	series         []storage.Series
+	delayPerSeries time.Duration
+}
+
+// Querier impls storage.Queryable
+func (q *MockShardedQueryable) Querier(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
+	return q, nil
+}
+
+// Select impls storage.Querier
+func (q *MockShardedQueryable) Select(
+	_ *storage.SelectParams,
+	matchers ...*labels.Matcher,
+) (storage.SeriesSet, storage.Warnings, error) {
+	tStart := time.Now()
+
+	shard, _, err := astmapper.ShardFromMatchers(matchers)
+	if err != nil {
+		return nil, nil, err
+	}
+
+	var (
+		start int
+		end   int
+	)
+
+	if shard == nil {
+		start = 0
+		end = len(q.series)
+	} else {
+		// return the series range associated with this shard
+		seriesPerShard := len(q.series) / shard.Of
+		start = shard.Shard * seriesPerShard
+		end = start + seriesPerShard
+
+		// if we're clipping an odd # of series, add the final series to the last shard
+		if end == len(q.series)-1 && len(q.series)%2 == 1 {
+			end = len(q.series)
+		}
+	}
+
+	var name string
+	for _, m := range matchers {
+		if m.Type == labels.MatchEqual && m.Name == "__name__" {
+			name = m.Value
+		}
+	}
+
+	results := make([]storage.Series, 0, end-start)
+	for i := start; i < end; i++ {
+		results = append(results, &ShardLabelSeries{
+			shard:  shard,
+			name:   name,
+			Series: q.series[i],
+		})
+	}
+
+	// loosely enforce the assumption that an operation on 1/nth of the data
+	// takes 1/nth of the time.
+	duration := q.delayPerSeries * time.Duration(len(q.series))
+	if shard != nil {
+		duration = duration / time.Duration(shard.Of)
+	}
+	remaining := tStart.Add(duration).Sub(time.Now())
+	if remaining > 0 {
+		time.Sleep(remaining)
+	}
+
+	return series.NewConcreteSeriesSet(results), nil, nil
+
+}
+
+// ShardLabelSeries allows extending a Series with new labels. This is helpful for adding cortex shard labels
+type ShardLabelSeries struct {
+	shard *astmapper.ShardAnnotation
+	name  string
+	storage.Series
+}
+
+// Labels impls storage.Series
+func (s *ShardLabelSeries) Labels() labels.Labels {
+	ls := s.Series.Labels()
+
+	if s.name != "" {
+		ls = append(ls, labels.Label{
+			Name:  "__name__",
+			Value: s.name,
+		})
+	}
+
+	if s.shard != nil {
+		ls = append(ls, s.shard.Label())
+	}
+
+	return ls
+}
+
+// LabelValues impls storage.Querier
+func (q *MockShardedQueryable) LabelValues(name string) ([]string, storage.Warnings, error) {
+	return nil, nil, errors.Errorf("unimplemented")
+}
+
+// LabelNames returns all the unique label names present in the block in sorted order.
+func (q *MockShardedQueryable) LabelNames() ([]string, storage.Warnings, error) {
+	return nil, nil, errors.Errorf("unimplemented")
+}
+
+// Close releases the resources of the Querier.
+func (q *MockShardedQueryable) Close() error {
+	return nil
+}
diff --git a/pkg/querier/queryrange/test_utils_test.go b/pkg/querier/queryrange/test_utils_test.go
new file mode 100644
index 00000000000..8e5f187b927
--- /dev/null
+++ b/pkg/querier/queryrange/test_utils_test.go
@@ -0,0 +1,134 @@
+package queryrange
+
+import (
+	"math"
+	"sort"
+	"testing"
+
+	"github.com/cortexproject/cortex/pkg/querier/astmapper"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/stretchr/testify/require"
+)
+
+func TestGenLabelsCorrectness(t *testing.T) {
+	ls := genLabels([]string{"a", "b"}, 2)
+	for _, set := range ls {
+		sort.Sort(set)
+	}
+	expected := []labels.Labels{
+		{
+			labels.Label{
+				Name:  "a",
+				Value: "0",
+			},
+			labels.Label{
+				Name:  "b",
+				Value: "0",
+			},
+		},
+		{
+			labels.Label{
+				Name:  "a",
+				Value: "0",
+			},
+			labels.Label{
+				Name:  "b",
+				Value: "1",
+			},
+		},
+		{
+			labels.Label{
+				Name:  "a",
+				Value: "1",
+			},
+			labels.Label{
+				Name:  "b",
+				Value: "0",
+			},
+		},
+		{
+			labels.Label{
+				Name:  "a",
+				Value: "1",
+			},
+			labels.Label{
+				Name:  "b",
+				Value: "1",
+			},
+		},
+	}
+	require.Equal(t, expected, ls)
+}
+
+func TestGenLabelsSize(t *testing.T) {
+	for _, tc := range []struct {
+		set     []string
+		buckets int
+	}{
+		{
+			set:     []string{"a", "b"},
+			buckets: 5,
+		},
+		{
+			set:     []string{"a", "b", "c"},
+			buckets: 10,
+		},
+	} {
+		sets := genLabels(tc.set, tc.buckets)
+		require.Equal(
+			t,
+			math.Pow(float64(tc.buckets), float64(len(tc.set))),
+			float64(len(sets)),
+		)
+	}
+}
+
+func TestNewMockShardedqueryable(t *testing.T) {
+	for _, tc := range []struct {
+		shards, nSamples, labelBuckets int
+		labelSet                       []string
+	}{
+		{
+			nSamples:     100,
+			shards:       1,
+			labelBuckets: 3,
+			labelSet:     []string{"a", "b", "c"},
+		},
+		{
+			nSamples:     0,
+			shards:       2,
+			labelBuckets: 3,
+			labelSet:     []string{"a", "b", "c"},
+		},
+	} {
+		q := NewMockShardedQueryable(tc.nSamples, tc.labelSet, tc.labelBuckets, 0)
+		expectedSeries := int(math.Pow(float64(tc.labelBuckets), float64(len(tc.labelSet))))
+
+		seriesCt := 0
+		for i := 0; i < tc.shards; i++ {
+
+			set, _, err := q.Select(nil, &labels.Matcher{
+				Type: labels.MatchEqual,
+				Name: astmapper.ShardLabel,
+				Value: astmapper.ShardAnnotation{
+					Shard: i,
+					Of:    tc.shards,
+				}.String(),
+			})
+
+			require.Nil(t, err)
+
+			for set.Next() {
+				seriesCt++
+				iter := set.At().Iterator()
+				samples := 0
+				for iter.Next() {
+					samples++
+				}
+				require.Equal(t, tc.nSamples, samples)
+			}
+
+		}
+		require.Equal(t, expectedSeries, seriesCt)
+	}
+}
diff --git a/pkg/querier/queryrange/value.go b/pkg/querier/queryrange/value.go
new file mode 100644
index 00000000000..23f519bff56
--- /dev/null
+++ b/pkg/querier/queryrange/value.go
@@ -0,0 +1,73 @@
+package queryrange
+
+import (
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+)
+
+// FromResult transforms a promql query result into a samplestream
+func FromResult(res *promql.Result) ([]SampleStream, error) {
+	if res.Err != nil {
+		return nil, res.Err
+	}
+	switch v := res.Value.(type) {
+	case promql.Scalar:
+		return []SampleStream{
+			{
+				Samples: []client.Sample{
+					{
+						Value:       v.V,
+						TimestampMs: v.T,
+					},
+				},
+			},
+		}, nil
+
+	case promql.Vector:
+		res := make([]SampleStream, 0, len(v))
+		for _, sample := range v {
+			res = append(res, SampleStream{
+				Labels:  mapLabels(sample.Metric),
+				Samples: mapPoints(sample.Point),
+			})
+		}
+		return res, nil
+
+	case promql.Matrix:
+		res := make([]SampleStream, 0, len(v))
+		for _, series := range v {
+			res = append(res, SampleStream{
+				Labels:  mapLabels(series.Metric),
+				Samples: mapPoints(series.Points...),
+			})
+		}
+		return res, nil
+
+	}
+
+	return nil, errors.Errorf("Unexpected value type: [%s]", res.Value.Type())
+}
+
+func mapLabels(ls labels.Labels) []client.LabelAdapter {
+	result := make([]client.LabelAdapter, 0, len(ls))
+	for _, l := range ls {
+		result = append(result, client.LabelAdapter(l))
+	}
+
+	return result
+}
+
+func mapPoints(pts ...promql.Point) []client.Sample {
+	result := make([]client.Sample, 0, len(pts))
+
+	for _, pt := range pts {
+		result = append(result, client.Sample{
+			Value:       pt.V,
+			TimestampMs: pt.T,
+		})
+	}
+
+	return result
+}
diff --git a/pkg/querier/queryrange/value_test.go b/pkg/querier/queryrange/value_test.go
new file mode 100644
index 00000000000..524c2e83d90
--- /dev/null
+++ b/pkg/querier/queryrange/value_test.go
@@ -0,0 +1,167 @@
+package queryrange
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/pkg/errors"
+	"github.com/prometheus/prometheus/pkg/labels"
+	"github.com/prometheus/prometheus/promql"
+	"github.com/stretchr/testify/require"
+)
+
+func TestFromValue(t *testing.T) {
+	var testExpr = []struct {
+		input    *promql.Result
+		err      bool
+		expected []SampleStream
+	}{
+		// string (errors)
+		{
+			input: &promql.Result{Value: promql.String{T: 1, V: "hi"}},
+			err:   true,
+		},
+		{
+			input: &promql.Result{Err: errors.New("foo")},
+			err:   true,
+		},
+		// Scalar
+		{
+			input: &promql.Result{Value: promql.Scalar{T: 1, V: 1}},
+			err:   false,
+			expected: []SampleStream{
+				{
+					Samples: []client.Sample{
+						{
+							Value:       1,
+							TimestampMs: 1,
+						},
+					},
+				},
+			},
+		},
+		// Vector
+		{
+			input: &promql.Result{
+				Value: promql.Vector{
+					promql.Sample{
+						Point: promql.Point{T: 1, V: 1},
+						Metric: labels.Labels{
+							{Name: "a", Value: "a1"},
+							{Name: "b", Value: "b1"},
+						},
+					},
+					promql.Sample{
+						Point: promql.Point{T: 2, V: 2},
+						Metric: labels.Labels{
+							{Name: "a", Value: "a2"},
+							{Name: "b", Value: "b2"},
+						},
+					},
+				},
+			},
+			err: false,
+			expected: []SampleStream{
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       1,
+							TimestampMs: 1,
+						},
+					},
+				},
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a2"},
+						{Name: "b", Value: "b2"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       2,
+							TimestampMs: 2,
+						},
+					},
+				},
+			},
+		},
+		// Matrix
+		{
+			input: &promql.Result{
+				Value: promql.Matrix{
+					{
+						Metric: labels.Labels{
+							{Name: "a", Value: "a1"},
+							{Name: "b", Value: "b1"},
+						},
+						Points: []promql.Point{
+							{T: 1, V: 1},
+							{T: 2, V: 2},
+						},
+					},
+					{
+						Metric: labels.Labels{
+							{Name: "a", Value: "a2"},
+							{Name: "b", Value: "b2"},
+						},
+						Points: []promql.Point{
+							{T: 1, V: 8},
+							{T: 2, V: 9},
+						},
+					},
+				},
+			},
+			err: false,
+			expected: []SampleStream{
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a1"},
+						{Name: "b", Value: "b1"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       1,
+							TimestampMs: 1,
+						},
+						{
+							Value:       2,
+							TimestampMs: 2,
+						},
+					},
+				},
+				{
+					Labels: []client.LabelAdapter{
+						{Name: "a", Value: "a2"},
+						{Name: "b", Value: "b2"},
+					},
+					Samples: []client.Sample{
+						{
+							Value:       8,
+							TimestampMs: 1,
+						},
+						{
+							Value:       9,
+							TimestampMs: 2,
+						},
+					},
+				},
+			},
+		},
+	}
+
+	for i, c := range testExpr {
+		t.Run(fmt.Sprintf("[%d]", i), func(t *testing.T) {
+			result, err := FromResult(c.input)
+			if c.err {
+				require.NotNil(t, err)
+			} else {
+				require.Nil(t, err)
+				require.Equal(t, c.expected, result)
+			}
+		})
+	}
+}
diff --git a/pkg/querier/remote_read_test.go b/pkg/querier/remote_read_test.go
index 049e0f56a7e..c7641eaabaa 100644
--- a/pkg/querier/remote_read_test.go
+++ b/pkg/querier/remote_read_test.go
@@ -10,6 +10,7 @@ import (
 	"testing"
 
 	"github.com/cortexproject/cortex/pkg/ingester/client"
+	"github.com/cortexproject/cortex/pkg/querier/series"
 	"github.com/gogo/protobuf/proto"
 	"github.com/golang/snappy"
 	"github.com/prometheus/common/model"
@@ -89,7 +90,7 @@ func (m mockQuerier) Select(sp *storage.SelectParams, matchers ...*labels.Matche
 	if sp == nil {
 		panic(fmt.Errorf("select params must be set"))
 	}
-	return matrixToSeriesSet(m.matrix), nil, nil
+	return series.MatrixToSeriesSet(m.matrix), nil, nil
 }
 
 func (m mockQuerier) LabelValues(name string) ([]string, storage.Warnings, error) {
diff --git a/pkg/querier/series_set.go b/pkg/querier/series/series_set.go
similarity index 64%
rename from pkg/querier/series_set.go
rename to pkg/querier/series/series_set.go
index bd302cff678..d079f57ec76 100644
--- a/pkg/querier/series_set.go
+++ b/pkg/querier/series/series_set.go
@@ -14,7 +14,7 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-package querier
+package series
 
 import (
 	"sort"
@@ -25,61 +25,69 @@ import (
 	"github.com/prometheus/prometheus/storage"
 )
 
-// concreteSeriesSet implements storage.SeriesSet.
-type concreteSeriesSet struct {
+// ConcreteSeriesSet implements storage.SeriesSet.
+type ConcreteSeriesSet struct {
 	cur    int
 	series []storage.Series
 }
 
-func newConcreteSeriesSet(series []storage.Series) storage.SeriesSet {
+// NewConcreteSeriesSet instantiates an in-memory series set from a series
+func NewConcreteSeriesSet(series []storage.Series) storage.SeriesSet {
 	sort.Sort(byLabels(series))
-	return &concreteSeriesSet{
+	return &ConcreteSeriesSet{
 		cur:    -1,
 		series: series,
 	}
 }
 
-func (c *concreteSeriesSet) Next() bool {
+// Next iterates through a series set and impls storage.SeriesSet
+func (c *ConcreteSeriesSet) Next() bool {
 	c.cur++
 	return c.cur < len(c.series)
 }
 
-func (c *concreteSeriesSet) At() storage.Series {
+// At returns the current series and impls storage.SeriesSet
+func (c *ConcreteSeriesSet) At() storage.Series {
 	return c.series[c.cur]
 }
 
-func (c *concreteSeriesSet) Err() error {
+// Err impls storage.SeriesSet
+func (c *ConcreteSeriesSet) Err() error {
 	return nil
 }
 
-// concreteSeries implements storage.Series.
-type concreteSeries struct {
+// ConcreteSeries implements storage.Series.
+type ConcreteSeries struct {
 	labels  labels.Labels
 	samples []model.SamplePair
 }
 
-func newConcreteSeries(ls labels.Labels, samples []model.SamplePair) *concreteSeries {
-	return &concreteSeries{
+// NewConcreteSeries instantiates an in memory series from a list of samples & labels
+func NewConcreteSeries(ls labels.Labels, samples []model.SamplePair) *ConcreteSeries {
+	return &ConcreteSeries{
 		labels:  ls,
 		samples: samples,
 	}
 }
 
-func (c *concreteSeries) Labels() labels.Labels {
+// Labels impls storage.Series
+func (c *ConcreteSeries) Labels() labels.Labels {
 	return c.labels
 }
 
-func (c *concreteSeries) Iterator() storage.SeriesIterator {
-	return newConcreteSeriesIterator(c)
+// Iterator impls storage.Series
+func (c *ConcreteSeries) Iterator() storage.SeriesIterator {
+	return NewConcreteSeriesIterator(c)
 }
 
 // concreteSeriesIterator implements storage.SeriesIterator.
 type concreteSeriesIterator struct {
 	cur    int
-	series *concreteSeries
+	series *ConcreteSeries
 }
 
-func newConcreteSeriesIterator(series *concreteSeries) storage.SeriesIterator {
+// NewConcreteSeriesIterator instaniates an in memory storage.SeriesIterator
+func NewConcreteSeriesIterator(series *ConcreteSeries) storage.SeriesIterator {
 	return &concreteSeriesIterator{
 		cur:    -1,
 		series: series,
@@ -107,6 +115,11 @@ func (c *concreteSeriesIterator) Err() error {
 	return nil
 }
 
+// NewErrIterator instantiates an errIterator
+func NewErrIterator(err error) storage.SeriesIterator {
+	return errIterator{err}
+}
+
 // errIterator implements storage.SeriesIterator, just returning an error.
 type errIterator struct {
 	err error
@@ -128,26 +141,28 @@ func (e errIterator) Err() error {
 	return e.err
 }
 
-func matrixToSeriesSet(m model.Matrix) storage.SeriesSet {
+// MatrixToSeriesSet creates a storage.SeriesSet from a model.Matrix
+func MatrixToSeriesSet(m model.Matrix) storage.SeriesSet {
 	series := make([]storage.Series, 0, len(m))
 	for _, ss := range m {
-		series = append(series, &concreteSeries{
+		series = append(series, &ConcreteSeries{
 			labels:  metricToLabels(ss.Metric),
 			samples: ss.Values,
 		})
 	}
-	return newConcreteSeriesSet(series)
+	return NewConcreteSeriesSet(series)
 }
 
-func metricsToSeriesSet(ms []metric.Metric) storage.SeriesSet {
+// MetricsToSeriesSet creates a storage.SeriesSet from a []metric.Metric
+func MetricsToSeriesSet(ms []metric.Metric) storage.SeriesSet {
 	series := make([]storage.Series, 0, len(ms))
 	for _, m := range ms {
-		series = append(series, &concreteSeries{
+		series = append(series, &ConcreteSeries{
 			labels:  metricToLabels(m.Metric),
 			samples: nil,
 		})
 	}
-	return newConcreteSeriesSet(series)
+	return NewConcreteSeriesSet(series)
 }
 
 func metricToLabels(m model.Metric) labels.Labels {
diff --git a/pkg/querier/series_set_test.go b/pkg/querier/series/series_set_test.go
similarity index 88%
rename from pkg/querier/series_set_test.go
rename to pkg/querier/series/series_set_test.go
index a1244b57f4d..a8ca1ae4efc 100644
--- a/pkg/querier/series_set_test.go
+++ b/pkg/querier/series/series_set_test.go
@@ -1,4 +1,4 @@
-package querier
+package series
 
 import (
 	"testing"
@@ -10,15 +10,15 @@ import (
 )
 
 func TestConcreteSeriesSet(t *testing.T) {
-	series1 := &concreteSeries{
+	series1 := &ConcreteSeries{
 		labels:  labels.FromStrings("foo", "bar"),
 		samples: []model.SamplePair{{Value: 1, Timestamp: 2}},
 	}
-	series2 := &concreteSeries{
+	series2 := &ConcreteSeries{
 		labels:  labels.FromStrings("foo", "baz"),
 		samples: []model.SamplePair{{Value: 3, Timestamp: 4}},
 	}
-	c := newConcreteSeriesSet([]storage.Series{series2, series1})
+	c := NewConcreteSeriesSet([]storage.Series{series2, series1})
 	require.True(t, c.Next())
 	require.Equal(t, series1, c.At())
 	require.True(t, c.Next())
@@ -39,7 +39,7 @@ func TestMatrixToSeriesSetSortsMetricLabels(t *testing.T) {
 			Values: []model.SamplePair{{Timestamp: 0, Value: 0}},
 		},
 	}
-	ss := matrixToSeriesSet(matrix)
+	ss := MatrixToSeriesSet(matrix)
 	require.True(t, ss.Next())
 	require.NoError(t, ss.Err())
 
diff --git a/pkg/querier/unified_querier.go b/pkg/querier/unified_querier.go
index 16b12b0d338..1173e630305 100644
--- a/pkg/querier/unified_querier.go
+++ b/pkg/querier/unified_querier.go
@@ -10,12 +10,13 @@ import (
 	"github.com/weaveworks/common/user"
 
 	"github.com/cortexproject/cortex/pkg/chunk"
+	"github.com/cortexproject/cortex/pkg/querier/chunkstore"
 )
 
-func newUnifiedChunkQueryable(ds, cs ChunkStore, distributor Distributor, chunkIteratorFunc chunkIteratorFunc, ingesterMaxQueryLookback time.Duration) storage.Queryable {
+func newUnifiedChunkQueryable(ds, cs chunkstore.ChunkStore, distributor Distributor, chunkIteratorFunc chunkIteratorFunc, ingesterMaxQueryLookback time.Duration) storage.Queryable {
 	return storage.QueryableFunc(func(ctx context.Context, mint, maxt int64) (storage.Querier, error) {
 		ucq := &unifiedChunkQuerier{
-			stores: []ChunkStore{cs},
+			stores: []chunkstore.ChunkStore{cs},
 			querier: querier{
 				ctx:         ctx,
 				mint:        mint,
@@ -40,7 +41,7 @@ func newUnifiedChunkQueryable(ds, cs ChunkStore, distributor Distributor, chunkI
 }
 
 type unifiedChunkQuerier struct {
-	stores []ChunkStore
+	stores []chunkstore.ChunkStore
 
 	// We reuse metadataQuery, LabelValues and Close from querier.
 	querier
@@ -53,7 +54,7 @@ func (q *unifiedChunkQuerier) Get(ctx context.Context, userID string, from, thro
 	css := make(chan []chunk.Chunk, len(q.stores))
 	errs := make(chan error, len(q.stores))
 	for _, store := range q.stores {
-		go func(store ChunkStore) {
+		go func(store chunkstore.ChunkStore) {
 			cs, err := store.Get(ctx, userID, from, through, matchers...)
 			if err != nil {
 				errs <- err
diff --git a/tools/query-audit/auditor.go b/tools/query-audit/auditor.go
new file mode 100644
index 00000000000..17ff61c3dbe
--- /dev/null
+++ b/tools/query-audit/auditor.go
@@ -0,0 +1,92 @@
+package main
+
+import (
+	"math"
+
+	"github.com/pkg/errors"
+	"github.com/prometheus/common/model"
+)
+
+// Auditor is a struct for auditing prometheus queries
+type Auditor struct{}
+
+// Diff stores a difference between two queries
+type Diff struct {
+	Series      int
+	Diff        float64   // avg proportional diff across all series & samples
+	sampleDiffs []float64 // proportional diffs as measured by x/control
+}
+
+// Audit audits two prometheus queries
+func (a *Auditor) Audit(control, x model.Value) (Diff, error) {
+	if x.Type() == model.ValMatrix && control.Type() == model.ValMatrix {
+		return a.auditMatrix(x.(model.Matrix), control.(model.Matrix))
+	}
+
+	if x.Type() == model.ValVector && control.Type() == model.ValVector {
+		return a.auditVector(x.(model.Vector), control.(model.Vector))
+	}
+
+	return Diff{}, errors.Errorf("unsupported types for equality: got %s & %s", control.Type().String(), x.Type().String())
+}
+
+func (a *Auditor) auditMatrix(x, y model.Matrix) (diff Diff, err error) {
+	// different # of returned series
+	if len(x) != len(y) {
+		return diff, errors.Errorf("different # of series: control=%d, other=%d", len(x), len(y))
+	}
+
+	for i := 0; i < len(x); i++ {
+		xSeries, ySeries := x[i], y[i]
+		if !xSeries.Metric.Equal(ySeries.Metric) {
+			return diff, errors.Errorf("mismatched metrics: %v vs %v", xSeries.Metric, ySeries.Metric)
+		}
+
+		xVals, yVals := xSeries.Values, ySeries.Values
+		if len(xVals) != len(yVals) {
+			return diff, errors.Errorf(
+				"mismatched number of samples for series %v. control=%d, other=%d",
+				xSeries.Metric,
+				len(xVals),
+				len(yVals),
+			)
+		}
+
+		for j := 0; j < len(xVals); j++ {
+			xSample, ySample := xVals[j], yVals[j]
+
+			if xSample.Timestamp != ySample.Timestamp {
+				return diff, errors.Errorf(
+					"mismatched timestamp for %d sample of series %v. control=%d, other=%d",
+					j,
+					xSeries.Metric,
+					xSample.Timestamp,
+					ySample.Timestamp,
+				)
+			}
+
+			absDiff := math.Abs(float64(ySample.Value-xSample.Value)) / math.Abs(float64(xSample.Value))
+
+			// 0/0 -> no diff
+			if math.IsNaN(absDiff) {
+				absDiff = 0
+			}
+
+			diff.sampleDiffs = append(diff.sampleDiffs, absDiff)
+
+		}
+	}
+
+	diff.Series = len(x)
+	var avgDiffProportion float64
+	for _, d := range diff.sampleDiffs {
+		avgDiffProportion += d
+	}
+	diff.Diff = avgDiffProportion / float64(len(diff.sampleDiffs))
+
+	return diff, nil
+}
+
+func (a *Auditor) auditVector(x, y model.Vector) (Diff, error) {
+	return Diff{}, errors.New("unimplemented")
+}
diff --git a/tools/query-audit/config.go b/tools/query-audit/config.go
new file mode 100644
index 00000000000..d1035fc1b49
--- /dev/null
+++ b/tools/query-audit/config.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+	"io/ioutil"
+	"time"
+
+	"github.com/pkg/errors"
+
+	"sigs.k8s.io/yaml"
+)
+
+type Backend struct {
+	Host    string            `yaml:"host" json:"host"`
+	Headers map[string]string `yaml:"headers" json:"headers"`
+}
+
+type Query struct {
+	Query       string    `yaml:"query" json:"query"`
+	Start       time.Time `yaml:"start" json:"start"`
+	End         time.Time `yaml:"end" json:"end"`
+	StepSizeStr string    `yaml:"step_size" json:"step_size"`
+	StepSize    time.Duration
+}
+
+func (q *Query) Validate() error {
+	parsedDur, err := time.ParseDuration(q.StepSizeStr)
+	if err != nil {
+		return err
+	}
+
+	q.StepSize = parsedDur
+
+	if q.StepSize == time.Duration(0) {
+		q.StepSize = time.Minute
+	}
+	return nil
+}
+
+type Config struct {
+	Control Backend  `yaml:"control" json:"control"`
+	Test    Backend  `yaml:"test" json:"test"`
+	Queries []*Query `yaml:"queries" json:"queries"`
+}
+
+func (cfg *Config) Validate() error {
+	for _, q := range cfg.Queries {
+		if err := q.Validate(); err != nil {
+			return err
+		}
+	}
+	return nil
+}
+
+// LoadConfig read YAML-formatted config from filename into cfg.
+func LoadConfig(filename string, cfg *Config) error {
+	buf, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return errors.Wrap(err, "Error reading config file")
+	}
+
+	err = yaml.Unmarshal(buf, cfg)
+	if err != nil {
+		return errors.Wrap(err, "Error parsing config file")
+	}
+
+	return cfg.Validate()
+}
diff --git a/tools/query-audit/example-config.yaml b/tools/query-audit/example-config.yaml
new file mode 100644
index 00000000000..11ffeeb7a5a
--- /dev/null
+++ b/tools/query-audit/example-config.yaml
@@ -0,0 +1,37 @@
+control:
+  host: http://localhost:8080/api/prom
+  headers:
+    "X-Scope-OrgID": 1234
+
+test:
+  host: http://localhost:8081/api/prom
+  headers:
+    "X-Scope-OrgID": 1234
+
+queries:
+  - query: 'sum(rate(container_cpu_usage_seconds_total[5m]))'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-28T00:00:00Z
+    step_size: 15m
+  - query: 'sum(rate(container_cpu_usage_seconds_total[5m])) by (container_name)'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-28T00:00:00Z
+    step_size: 15m
+  - query: 'sum(rate(container_cpu_usage_seconds_total[5m])) without (container_name)'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-26T00:00:00Z
+    step_size: 15m
+  - query: 'histogram_quantile(0.9, sum(rate(cortex_cache_value_size_bytes_bucket[5m])) by (le, job))'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-25T06:00:00Z
+    step_size: 15m
+    # two shardable legs
+  - query: 'sum without (instance, job) (rate(cortex_query_frontend_queue_length[5m])) or sum by (job) (rate(cortex_query_frontend_queue_length[5m]))'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-25T06:00:00Z
+    step_size: 15m
+    # one shardable leg
+  - query: 'sum without (instance, job) (rate(cortex_cache_request_duration_seconds_count[5m])) or rate(cortex_cache_request_duration_seconds_count[5m])'
+    start: 2019-11-25T00:00:00Z
+    end: 2019-11-25T06:00:00Z
+    step_size: 15m
diff --git a/tools/query-audit/main.go b/tools/query-audit/main.go
new file mode 100644
index 00000000000..f5aab81c348
--- /dev/null
+++ b/tools/query-audit/main.go
@@ -0,0 +1,88 @@
+package main
+
+import (
+	"context"
+	"flag"
+	"fmt"
+	"log"
+
+	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
+)
+
+var (
+	configFile string
+	outFile    string
+)
+
+func init() {
+	flag.StringVar(&configFile, "f", "", "path to config file")
+	flag.Parse()
+	if configFile == "" {
+		log.Fatal(`unset configFile. try "-f <file>"`)
+	}
+}
+
+func main() {
+	var conf Config
+	if err := LoadConfig(configFile, &conf); err != nil {
+		log.Fatal(err)
+	}
+
+	err := Run(conf)
+	if err != nil {
+		log.Fatal(err)
+	}
+}
+
+func Run(conf Config) error {
+	ctlAPI, err := NewAPI(conf.Control)
+	if err != nil {
+		return err
+	}
+
+	tstAPI, err := NewAPI(conf.Test)
+	if err != nil {
+		return err
+	}
+
+	for _, query := range conf.Queries {
+		ctlResp, _, err := ctlAPI.QueryRange(context.Background(), query.Query, v1.Range{
+			Start: query.Start,
+			End:   query.End,
+			Step:  query.StepSize,
+		})
+
+		if err != nil {
+			return err
+		}
+
+		tstResp, _, err := tstAPI.QueryRange(context.Background(), query.Query, v1.Range{
+			Start: query.Start,
+			End:   query.End,
+			Step:  query.StepSize,
+		})
+
+		if err != nil {
+			return err
+		}
+
+		auditor := &Auditor{}
+		diff, err := auditor.Audit(ctlResp, tstResp)
+		if err != nil {
+			return err
+		}
+
+		fmt.Println(fmt.Sprintf(
+			"\n%f%% avg diff for:\n\tquery: %s\n\tseries: %d\n\tsamples: %d\n\tstart: %v\n\tend: %v\n\tstep: %v",
+			diff.Diff*100,
+			query.Query,
+			diff.Series,
+			len(diff.sampleDiffs),
+			query.Start,
+			query.End,
+			query.StepSize,
+		))
+
+	}
+	return nil
+}
diff --git a/tools/query-audit/runner.go b/tools/query-audit/runner.go
new file mode 100644
index 00000000000..e8eadee852d
--- /dev/null
+++ b/tools/query-audit/runner.go
@@ -0,0 +1,32 @@
+package main
+
+import (
+	"net/http"
+
+	"github.com/prometheus/client_golang/api"
+	v1 "github.com/prometheus/client_golang/api/prometheus/v1"
+	"github.com/prometheus/client_golang/prometheus/promhttp"
+)
+
+// NewAPI instantiates a prometheus api
+func NewAPI(backend Backend) (v1.API, error) {
+	config := api.Config{
+		Address: backend.Host,
+	}
+
+	if len(backend.Headers) > 0 {
+		config.RoundTripper = promhttp.RoundTripperFunc(func(req *http.Request) (*http.Response, error) {
+			for key, value := range backend.Headers {
+				req.Header.Add(key, value)
+			}
+			return http.DefaultTransport.RoundTrip(req)
+		})
+	}
+
+	c, err := api.NewClient(config)
+	if err != nil {
+		return nil, err
+	}
+
+	return v1.NewAPI(c), nil
+}