Skip to content

Commit c51fc02

Browse files
committed
Exposing parquet configs
Signed-off-by: alanprot <[email protected]>
1 parent bc722c1 commit c51fc02

File tree

5 files changed

+153
-10
lines changed

5 files changed

+153
-10
lines changed

docs/blocks-storage/querier.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,22 @@ querier:
278278
# [Experimental] If true, experimental promQL functions are enabled.
279279
# CLI flag: -querier.enable-promql-experimental-functions
280280
[enable_promql_experimental_functions: <boolean> | default = false]
281+
282+
# [Experimental] If true, querier will try to query the parquet files if
283+
# available.
284+
# CLI flag: -querier.enable-parquet-queryable
285+
[enable_parquet_queryable: <boolean> | default = false]
286+
287+
# [Experimental] Maximum size of the Parquet queryable shard cache. 0 to
288+
# disable.
289+
# CLI flag: -querier.parquet-queryable-shard-cache-size
290+
[parquet_queryable_shard_cache_size: <int> | default = 512]
291+
292+
# [Experimental] Parquet queryable's default block store to query. Valid
293+
# options are tsdb and parquet. If it is set to tsdb, parquet queryable always
294+
# fallback to store gateway.
295+
# CLI flag: -querier.parquet-queryable-default-block-store
296+
[parquet_queryable_default_block_store: <string> | default = "parquet"]
281297
```
282298
283299
### `blocks_storage_config`

docs/configuration/config-file-reference.md

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,110 @@ api:
162162
# The compactor_config configures the compactor for the blocks storage.
163163
[compactor: <compactor_config>]
164164

165+
parquet_converter:
166+
# Maximum concurrent goroutines for downloading block metadata from object
167+
# storage.
168+
# CLI flag: -parquet-converter.meta-sync-concurrency
169+
[meta_sync_concurrency: <int> | default = 20]
170+
171+
# How often to check for new TSDB blocks to convert to parquet format.
172+
# CLI flag: -parquet-converter.conversion-interval
173+
[conversion_interval: <duration> | default = 1m]
174+
175+
# Maximum number of time series per parquet row group. Larger values improve
176+
# compression but may reduce performance during reads.
177+
# CLI flag: -parquet-converter.max-rows-per-row-group
178+
[max_rows_per_row_group: <int> | default = 1000000]
179+
180+
# Enable disk-based write buffering to reduce memory consumption during
181+
# parquet file generation.
182+
# CLI flag: -parquet-converter.file-buffer-enabled
183+
[file_buffer_enabled: <boolean> | default = true]
184+
185+
# Local directory path for caching TSDB blocks during parquet conversion.
186+
# CLI flag: -parquet-converter.data-dir
187+
[data_dir: <string> | default = "./data"]
188+
189+
ring:
190+
kvstore:
191+
# Backend storage to use for the ring. Supported values are: consul, etcd,
192+
# inmemory, memberlist, multi.
193+
# CLI flag: -parquet-converter.ring.store
194+
[store: <string> | default = "consul"]
195+
196+
# The prefix for the keys in the store. Should end with a /.
197+
# CLI flag: -parquet-converter.ring.prefix
198+
[prefix: <string> | default = "collectors/"]
199+
200+
dynamodb:
201+
# Region to access dynamodb.
202+
# CLI flag: -parquet-converter.ring.dynamodb.region
203+
[region: <string> | default = ""]
204+
205+
# Table name to use on dynamodb.
206+
# CLI flag: -parquet-converter.ring.dynamodb.table-name
207+
[table_name: <string> | default = ""]
208+
209+
# Time to expire items on dynamodb.
210+
# CLI flag: -parquet-converter.ring.dynamodb.ttl-time
211+
[ttl: <duration> | default = 0s]
212+
213+
# Time to refresh local ring with information on dynamodb.
214+
# CLI flag: -parquet-converter.ring.dynamodb.puller-sync-time
215+
[puller_sync_time: <duration> | default = 1m]
216+
217+
# Maximum number of retries for DDB KV CAS.
218+
# CLI flag: -parquet-converter.ring.dynamodb.max-cas-retries
219+
[max_cas_retries: <int> | default = 10]
220+
221+
# Timeout of dynamoDbClient requests. Default is 2m.
222+
# CLI flag: -parquet-converter.ring.dynamodb.timeout
223+
[timeout: <duration> | default = 2m]
224+
225+
# The consul_config configures the consul client.
226+
# The CLI flags prefix for this block config is: parquet-converter.ring
227+
[consul: <consul_config>]
228+
229+
# The etcd_config configures the etcd client.
230+
# The CLI flags prefix for this block config is: parquet-converter.ring
231+
[etcd: <etcd_config>]
232+
233+
multi:
234+
# Primary backend storage used by multi-client.
235+
# CLI flag: -parquet-converter.ring.multi.primary
236+
[primary: <string> | default = ""]
237+
238+
# Secondary backend storage used by multi-client.
239+
# CLI flag: -parquet-converter.ring.multi.secondary
240+
[secondary: <string> | default = ""]
241+
242+
# Mirror writes to secondary store.
243+
# CLI flag: -parquet-converter.ring.multi.mirror-enabled
244+
[mirror_enabled: <boolean> | default = false]
245+
246+
# Timeout for storing value to secondary store.
247+
# CLI flag: -parquet-converter.ring.multi.mirror-timeout
248+
[mirror_timeout: <duration> | default = 2s]
249+
250+
# Period at which to heartbeat to the ring. 0 = disabled.
251+
# CLI flag: -parquet-converter.ring.heartbeat-period
252+
[heartbeat_period: <duration> | default = 5s]
253+
254+
# The heartbeat timeout after which parquet-converter are considered
255+
# unhealthy within the ring. 0 = never (timeout disabled).
256+
# CLI flag: -parquet-converter.ring.heartbeat-timeout
257+
[heartbeat_timeout: <duration> | default = 1m]
258+
259+
# Time since last heartbeat before parquet-converter will be removed from
260+
# ring. 0 to disable
261+
# CLI flag: -parquet-converter.auto-forget-delay
262+
[auto_forget_delay: <duration> | default = 2m]
263+
264+
# File path where tokens are stored. If empty, tokens are not stored at
265+
# shutdown and restored at startup.
266+
# CLI flag: -parquet-converter.ring.tokens-file-path
267+
[tokens_file_path: <string> | default = ""]
268+
165269
# The store_gateway_config configures the store-gateway service used by the
166270
# blocks storage.
167271
[store_gateway: <store_gateway_config>]
@@ -2573,6 +2677,7 @@ The `consul_config` configures the consul client. The supported CLI flags `<pref
25732677
- `compactor.ring`
25742678
- `distributor.ha-tracker`
25752679
- `distributor.ring`
2680+
- `parquet-converter.ring`
25762681
- `ruler.ring`
25772682
- `store-gateway.sharding-ring`
25782683

@@ -2894,6 +2999,7 @@ The `etcd_config` configures the etcd client. The supported CLI flags `<prefix>`
28942999
- `compactor.ring`
28953000
- `distributor.ha-tracker`
28963001
- `distributor.ring`
3002+
- `parquet-converter.ring`
28973003
- `ruler.ring`
28983004
- `store-gateway.sharding-ring`
28993005

@@ -4328,6 +4434,22 @@ thanos_engine:
43284434
# [Experimental] If true, experimental promQL functions are enabled.
43294435
# CLI flag: -querier.enable-promql-experimental-functions
43304436
[enable_promql_experimental_functions: <boolean> | default = false]
4437+
4438+
# [Experimental] If true, querier will try to query the parquet files if
4439+
# available.
4440+
# CLI flag: -querier.enable-parquet-queryable
4441+
[enable_parquet_queryable: <boolean> | default = false]
4442+
4443+
# [Experimental] Maximum size of the Parquet queryable shard cache. 0 to
4444+
# disable.
4445+
# CLI flag: -querier.parquet-queryable-shard-cache-size
4446+
[parquet_queryable_shard_cache_size: <int> | default = 512]
4447+
4448+
# [Experimental] Parquet queryable's default block store to query. Valid options
4449+
# are tsdb and parquet. If it is set to tsdb, parquet queryable always fallback
4450+
# to store gateway.
4451+
# CLI flag: -querier.parquet-queryable-default-block-store
4452+
[parquet_queryable_default_block_store: <string> | default = "parquet"]
43314453
```
43324454
43334455
### `query_frontend_config`

pkg/cortex/cortex.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ type Config struct {
114114
QueryRange queryrange.Config `yaml:"query_range"`
115115
BlocksStorage tsdb.BlocksStorageConfig `yaml:"blocks_storage"`
116116
Compactor compactor.Config `yaml:"compactor"`
117-
ParquetConverter parquetconverter.Config `yaml:"parquet_converter" doc:"hidden"`
117+
ParquetConverter parquetconverter.Config `yaml:"parquet_converter"`
118118
StoreGateway storegateway.Config `yaml:"store_gateway"`
119119
TenantFederation tenantfederation.Config `yaml:"tenant_federation"`
120120

pkg/parquetconverter/converter.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,11 @@ type Converter struct {
104104
func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
105105
cfg.Ring.RegisterFlags(f)
106106

107-
f.StringVar(&cfg.DataDir, "parquet-converter.data-dir", "./data", "Data directory in which to cache blocks and process conversions.")
108-
f.IntVar(&cfg.MetaSyncConcurrency, "parquet-converter.meta-sync-concurrency", 20, "Number of Go routines to use when syncing block meta files from the long term storage.")
109-
f.IntVar(&cfg.MaxRowsPerRowGroup, "parquet-converter.max-rows-per-row-group", 1e6, "Max number of rows per parquet row group.")
110-
f.DurationVar(&cfg.ConversionInterval, "parquet-converter.conversion-interval", time.Minute, "The frequency at which the conversion job runs.")
111-
f.BoolVar(&cfg.FileBufferEnabled, "parquet-converter.file-buffer-enabled", true, "Whether to enable buffering the writes in disk to reduce memory utilization.")
107+
f.StringVar(&cfg.DataDir, "parquet-converter.data-dir", "./data", "Local directory path for caching TSDB blocks during parquet conversion.")
108+
f.IntVar(&cfg.MetaSyncConcurrency, "parquet-converter.meta-sync-concurrency", 20, "Maximum concurrent goroutines for downloading block metadata from object storage.")
109+
f.IntVar(&cfg.MaxRowsPerRowGroup, "parquet-converter.max-rows-per-row-group", 1e6, "Maximum number of time series per parquet row group. Larger values improve compression but may reduce performance during reads.")
110+
f.DurationVar(&cfg.ConversionInterval, "parquet-converter.conversion-interval", time.Minute, "How often to check for new TSDB blocks to convert to parquet format.")
111+
f.BoolVar(&cfg.FileBufferEnabled, "parquet-converter.file-buffer-enabled", true, "Enable disk-based write buffering to reduce memory consumption during parquet file generation.")
112112
}
113113

114114
func NewConverter(cfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, blockRanges []int64, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides) (*Converter, error) {

pkg/querier/querier.go

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ type Config struct {
9292
EnablePromQLExperimentalFunctions bool `yaml:"enable_promql_experimental_functions"`
9393

9494
// Query Parquet files if available
95-
EnableParquetQueryable bool `yaml:"enable_parquet_queryable" doc:"hidden"`
96-
ParquetQueryableShardCacheSize int `yaml:"parquet_queryable_shard_cache_size" doc:"hidden"`
97-
ParquetQueryableDefaultBlockStore string `yaml:"parquet_queryable_default_block_store" doc:"hidden"`
98-
ParquetQueryableFallbackDisabled bool `yaml:"parquet_queryable_fallback_disabled" doc:"hidden"`
95+
EnableParquetQueryable bool `yaml:"enable_parquet_queryable"`
96+
ParquetQueryableShardCacheSize int `yaml:"parquet_queryable_shard_cache_size"`
97+
ParquetQueryableDefaultBlockStore string `yaml:"parquet_queryable_default_block_store"`
98+
ParquetQueryableFallbackDisabled bool `yaml:"parquet_queryable_fallback_disabled"`
9999
}
100100

101101
var (
@@ -144,9 +144,14 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
144144
f.BoolVar(&cfg.IgnoreMaxQueryLength, "querier.ignore-max-query-length", false, "If enabled, ignore max query length check at Querier select method. Users can choose to ignore it since the validation can be done before Querier evaluation like at Query Frontend or Ruler.")
145145
f.BoolVar(&cfg.EnablePromQLExperimentalFunctions, "querier.enable-promql-experimental-functions", false, "[Experimental] If true, experimental promQL functions are enabled.")
146146
f.BoolVar(&cfg.EnableParquetQueryable, "querier.enable-parquet-queryable", false, "[Experimental] If true, querier will try to query the parquet files if available.")
147+
<<<<<<< HEAD
147148
f.IntVar(&cfg.ParquetQueryableShardCacheSize, "querier.parquet-queryable-shard-cache-size", 512, "[Experimental] [Experimental] Maximum size of the Parquet queryable shard cache. 0 to disable.")
148149
f.StringVar(&cfg.ParquetQueryableDefaultBlockStore, "querier.parquet-queryable-default-block-store", string(parquetBlockStore), "Parquet queryable's default block store to query. Valid options are tsdb and parquet. If it is set to tsdb, parquet queryable always fallback to store gateway.")
149150
f.BoolVar(&cfg.ParquetQueryableFallbackDisabled, "querier.parquet-queryable-fallback-disabled", false, "[Experimental] Disable Parquet queryable to fallback queries to Store Gateway if the block is not available as Parquet files but available in TSDB. Setting this to true will disable the fallback and users can remove Store Gateway. But need to make sure Parquet files are created before it is queryable.")
151+
=======
152+
f.IntVar(&cfg.ParquetQueryableShardCacheSize, "querier.parquet-queryable-shard-cache-size", 512, "[Experimental] Maximum size of the Parquet queryable shard cache. 0 to disable.")
153+
f.StringVar(&cfg.ParquetQueryableDefaultBlockStore, "querier.parquet-queryable-default-block-store", string(parquetBlockStore), "[Experimental] Parquet queryable's default block store to query. Valid options are tsdb and parquet. If it is set to tsdb, parquet queryable always fallback to store gateway.")
154+
>>>>>>> 8b66330f1a (Exposing parquet configs)
150155
}
151156

152157
// Validate the config

0 commit comments

Comments
 (0)