Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* `cortex_querier_bucket_store_blocks_meta_sync_duration_seconds` > `cortex_querier_blocks_meta_sync_duration_seconds`
* `cortex_querier_bucket_store_blocks_meta_sync_consistency_delay_seconds` > `cortex_querier_blocks_meta_sync_consistency_delay_seconds`
* [CHANGE] Experimental TSDB: Modified default values for `compactor.deletion-delay` option from 48h to 12h and `-experimental.tsdb.bucket-store.ignore-deletion-marks-delay` from 24h to 6h. #2414
* [FEATURE] Ruler: The `-ruler.evaluation-delay` flag was added to allow users to configure a default evaluation delay for all rules in cortex. The default value is 0 which is the current behavior. #2423
* [ENHANCEMENT] Experimental TSDB: sample ingestion errors are now reported via existing `cortex_discarded_samples_total` metric. #2370
* [ENHANCEMENT] Failures on samples at distributors and ingesters return the first validation error as opposed to the last. #2383
* [ENHANCEMENT] Experimental TSDB: Added `cortex_querier_blocks_meta_synced`, which reflects current state of synced blocks over all tenants. #2392
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -700,6 +700,11 @@ The `ruler_config` configures the Cortex ruler.
# CLI flag: -ruler.evaluation-interval
[evaluation_interval: <duration> | default = 1m0s]

# Set interval to delay the evaluation of rules to ensure they underlying
# metrics have been pushed to cortex. Default
# CLI flag: -ruler.evaluation-delay
[evaluation_delay: <duration> | default = 0s]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

evaluation_delay_time/evaluation_delay_duration?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

evaluation_delay_duration makes sense


# How frequently to poll for rule changes
# CLI flag: -ruler.poll-interval
[poll_interval: <duration> | default = 1m0s]
Expand Down
33 changes: 33 additions & 0 deletions pkg/ruler/compat.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,12 @@ package ruler

import (
"context"
"time"

"github.com/pkg/errors"
"github.com/prometheus/prometheus/pkg/labels"
"github.com/prometheus/prometheus/promql"
"github.com/prometheus/prometheus/rules"
"github.com/prometheus/prometheus/storage"
"github.com/weaveworks/common/user"

Expand Down Expand Up @@ -78,3 +82,32 @@ func (t *tsdb) StartTime() (int64, error) {
func (t *tsdb) Close() error {
return nil
}

// engineQueryFunc returns a new query function that executes instant queries against
// the given engine, after subtracting the provided delay from the instant query timestamp.
// It converts scalar into vector results.
// Based on https://github.com/prometheus/prometheus/blob/ecda6013edf58bf645c6661b9f78ccce03b1f315/rules/manager.go#L162-L187
func engineQueryFunc(engine *promql.Engine, q storage.Queryable, delay time.Duration) rules.QueryFunc {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This whole function is a copy of promRules.EngineQueryFunc() just to add t = t.Add(-delay) as first thing. Why don't we simply wrap promRules.EngineQueryFunc() instead of duplicating it? Something like (pseudo-code):

func engineQueryFunc(engine *promql.Engine, q storage.Queryable, delay time.Duration) rules.QueryFunc {
orig := promRules.EngineQueryFunc(engine, queryable)

return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
  return orig(ctx, qs, t.Add(-delay))
}
}

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will do.

return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) {
t = t.Add(-delay)
q, err := engine.NewInstantQuery(q, qs, t)
if err != nil {
return nil, err
}
res := q.Exec(ctx)
if res.Err != nil {
return nil, res.Err
}
switch v := res.Value.(type) {
case promql.Vector:
return v, nil
case promql.Scalar:
return promql.Vector{promql.Sample{
Point: promql.Point(v),
Metric: labels.Labels{},
}}, nil
default:
return nil, errors.New("rule result is not a vector or scalar")
}
}
}
6 changes: 5 additions & 1 deletion pkg/ruler/ruler.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ type Config struct {
ExternalURL flagext.URLValue `yaml:"external_url"`
// How frequently to evaluate rules by default.
EvaluationInterval time.Duration `yaml:"evaluation_interval"`
// Delay the evaluation of all rules by a set interval to give a buffer
// to metric that haven't been forwarded to cortex yet.
EvaluationDelay time.Duration `yaml:"evaluation_delay"`
// How frequently to poll for updated rules.
PollInterval time.Duration `yaml:"poll_interval"`
// Rule Storage and Polling configuration.
Expand Down Expand Up @@ -103,6 +106,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil
f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.")
f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules")
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay", 0, "Set interval to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex. Default ")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay", 0, "Set interval to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex. Default ")
f.DurationVar(&cfg.EvaluationDelay, "ruler.evaluation-delay", 0, "Set interval to delay the evaluation of rules to ensure they underlying metrics have been pushed to cortex. ")

f.DurationVar(&cfg.PollInterval, "ruler.poll-interval", 1*time.Minute, "How frequently to poll for rule changes")
f.Var(&cfg.AlertmanagerURL, "ruler.alertmanager-url", "URL of the Alertmanager to send notifications to.")
f.BoolVar(&cfg.AlertmanagerDiscovery, "ruler.alertmanager-discovery", false, "Use DNS SRV records to discover alertmanager hosts.")
Expand Down Expand Up @@ -480,7 +484,7 @@ func (r *Ruler) newManager(ctx context.Context, userID string) (*promRules.Manag
opts := &promRules.ManagerOptions{
Appendable: tsdb,
TSDB: tsdb,
QueryFunc: promRules.EngineQueryFunc(r.engine, r.queryable),
QueryFunc: engineQueryFunc(r.engine, r.queryable, r.cfg.EvaluationDelay),
Context: user.InjectOrgID(ctx, userID),
ExternalURL: r.alertURL,
NotifyFunc: sendAlerts(notifier, r.alertURL.String()),
Expand Down