@@ -254,8 +254,10 @@ type Ruler struct {
254
254
// Pool of clients used to connect to other ruler replicas.
255
255
clientsPool ClientsPool
256
256
257
- ringCheckErrors prometheus.Counter
258
- rulerSync * prometheus.CounterVec
257
+ ringCheckErrors prometheus.Counter
258
+ rulerSync * prometheus.CounterVec
259
+ ruleGroupStoreLoadDuration prometheus.Gauge
260
+ ruleGroupSyncDuration prometheus.Gauge
259
261
260
262
allowedTenants * util.AllowedTenants
261
263
@@ -288,6 +290,16 @@ func newRuler(cfg Config, manager MultiTenantManager, reg prometheus.Registerer,
288
290
Name : "cortex_ruler_sync_rules_total" ,
289
291
Help : "Total number of times the ruler sync operation triggered." ,
290
292
}, []string {"reason" }),
293
+
294
+ ruleGroupStoreLoadDuration : promauto .With (reg ).NewGauge (prometheus.GaugeOpts {
295
+ Name : "cortex_ruler_rule_group_load_duration_seconds" ,
296
+ Help : "Time taken to load rule groups from storage" ,
297
+ }),
298
+
299
+ ruleGroupSyncDuration : promauto .With (reg ).NewGauge (prometheus.GaugeOpts {
300
+ Name : "cortex_ruler_rule_group_sync_duration_seconds" ,
301
+ Help : "The duration in seconds required to sync and load rule groups from storage." ,
302
+ }),
291
303
}
292
304
293
305
if len (cfg .EnabledTenants ) > 0 {
@@ -512,20 +524,41 @@ func (r *Ruler) run(ctx context.Context) error {
512
524
func (r * Ruler ) syncRules (ctx context.Context , reason string ) {
513
525
level .Debug (r .logger ).Log ("msg" , "syncing rules" , "reason" , reason )
514
526
r .rulerSync .WithLabelValues (reason ).Inc ()
527
+ timer := prometheus .NewTimer (nil )
528
+
529
+ defer func () {
530
+ ruleGroupSyncDuration := timer .ObserveDuration ().Seconds ()
531
+ r .ruleGroupSyncDuration .Set (ruleGroupSyncDuration )
532
+ }()
533
+
534
+ loadedConfigs , err := r .loadRuleGroups (ctx )
535
+ if err != nil {
536
+ return
537
+ }
538
+
539
+ // This will also delete local group files for users that are no longer in 'configs' map.
540
+ r .manager .SyncRuleGroups (ctx , loadedConfigs )
541
+ }
542
+
543
+ func (r * Ruler ) loadRuleGroups (ctx context.Context ) (map [string ]rulespb.RuleGroupList , error ) {
544
+ timer := prometheus .NewTimer (nil )
545
+
546
+ defer func () {
547
+ storeLoadSeconds := timer .ObserveDuration ().Seconds ()
548
+ r .ruleGroupStoreLoadDuration .Set (storeLoadSeconds )
549
+ }()
515
550
516
551
configs , err := r .listRules (ctx )
517
552
if err != nil {
518
553
level .Error (r .logger ).Log ("msg" , "unable to list rules" , "err" , err )
519
- return
554
+ return nil , err
520
555
}
521
556
522
557
loadedConfigs , err := r .store .LoadRuleGroups (ctx , configs )
523
558
if err != nil {
524
559
level .Warn (r .logger ).Log ("msg" , "failed to load some rules owned by this ruler" , "count" , len (configs )- len (loadedConfigs ), "err" , err )
525
560
}
526
-
527
- // This will also delete local group files for users that are no longer in 'configs' map.
528
- r .manager .SyncRuleGroups (ctx , loadedConfigs )
561
+ return loadedConfigs , nil
529
562
}
530
563
531
564
func (r * Ruler ) listRules (ctx context.Context ) (result map [string ]rulespb.RuleGroupList , err error ) {
0 commit comments