@@ -512,7 +512,7 @@ func tokenForGroup(g *rulespb.RuleGroupDesc) uint32 {
512
512
return ringHasher .Sum32 ()
513
513
}
514
514
515
- func (r * Ruler ) instanceOwnsRuleGroup (rr ring.ReadRing , g * rulespb.RuleGroupDesc , disabledRuleGroups validation.DisabledRuleGroups , instanceAddr string , forBackup bool ) (bool , error ) {
515
+ func (r * Ruler ) instanceOwnsRuleGroup (rr ring.ReadRing , g * rulespb.RuleGroupDesc , disabledRuleGroups validation.DisabledRuleGroups , forBackup bool ) (bool , error ) {
516
516
517
517
hash := tokenForGroup (g )
518
518
@@ -521,6 +521,7 @@ func (r *Ruler) instanceOwnsRuleGroup(rr ring.ReadRing, g *rulespb.RuleGroupDesc
521
521
return false , errors .Wrap (err , "error reading ring to verify rule group ownership" )
522
522
}
523
523
524
+ instanceAddr := r .lifecycler .GetInstanceAddr ()
524
525
if forBackup {
525
526
// Only the second up to the last replica are used as backup
526
527
for i := 1 ; i < len (rlrs .Instances ); i ++ {
@@ -536,7 +537,7 @@ func (r *Ruler) instanceOwnsRuleGroup(rr ring.ReadRing, g *rulespb.RuleGroupDesc
536
537
level .Debug (r .Logger ()).Log ("msg" , "primary taking ownership" , "user" , g .User , "group" , g .Name , "namespace" , g .Namespace , "ruler" , instanceAddr )
537
538
return ownsRuleGroupOrDisable (g , disabledRuleGroups )
538
539
}
539
- if ruler .Addr == instanceAddr && r .nonPrimaryInstanceOwnsRuleGroup (g , rlrs .GetAddresses ()[:i ], instanceAddr ) {
540
+ if ruler .Addr == instanceAddr && r .nonPrimaryInstanceOwnsRuleGroup (g , rlrs .GetAddresses ()[:i ]) {
540
541
level .Info (r .Logger ()).Log ("msg" , "non-primary ruler taking ownership" , "user" , g .User , "group" , g .Name , "namespace" , g .Namespace , "ruler" , instanceAddr )
541
542
return ownsRuleGroupOrDisable (g , disabledRuleGroups )
542
543
}
@@ -569,7 +570,7 @@ func (r *Ruler) LivenessCheck(_ context.Context, request *LivenessCheckRequest)
569
570
570
571
// This function performs a liveness check against the provided replicas. If any one of the replicas responds with a state = Running, then
571
572
// this Ruler should not take ownership of the rule group. Otherwise, this Ruler must take ownership of the rule group to avoid missing evaluations
572
- func (r * Ruler ) nonPrimaryInstanceOwnsRuleGroup (g * rulespb.RuleGroupDesc , replicas []string , selfAddress string ) bool {
573
+ func (r * Ruler ) nonPrimaryInstanceOwnsRuleGroup (g * rulespb.RuleGroupDesc , replicas []string ) bool {
573
574
userID := g .User
574
575
575
576
jobs := concurrency .CreateJobsFromStrings (replicas )
@@ -586,14 +587,12 @@ func (r *Ruler) nonPrimaryInstanceOwnsRuleGroup(g *rulespb.RuleGroupDesc, replic
586
587
rulerClient , err := r .GetClientFor (addr )
587
588
if err != nil {
588
589
errorChan <- err
589
- level .Debug (r .Logger ()).Log ("msg" , "unable to get client for ruler" , "ruler addr" , addr )
590
+ level .Error (r .Logger ()).Log ("msg" , "unable to get client for ruler" , "ruler addr" , addr )
590
591
return nil
591
592
}
592
- level .Debug (r .Logger ()).Log ("msg" , "performing liveness check against" , "addr" , addr , "for" , g .Name , "instance addr" , selfAddress )
593
+ level .Debug (r .Logger ()).Log ("msg" , "performing liveness check against" , "addr" , addr , "for" , g .Name )
593
594
594
- resp , err := rulerClient .LivenessCheck (ctx , & LivenessCheckRequest {
595
- RulerAddress : selfAddress ,
596
- })
595
+ resp , err := rulerClient .LivenessCheck (ctx , & LivenessCheckRequest {})
597
596
if err != nil {
598
597
errorChan <- err
599
598
level .Debug (r .Logger ()).Log ("msg" , "liveness check failed" , "addr" , addr , "for" , g .Name , "err" , err .Error ())
@@ -804,12 +803,12 @@ func (r *Ruler) listRulesShardingDefault(ctx context.Context) (map[string]rulesp
804
803
backedUpConfigs := make (map [string ]rulespb.RuleGroupList )
805
804
for userID , groups := range configs {
806
805
ruleGroupCounts [userID ] = len (groups )
807
- owned := r .filterRuleGroups (userID , groups , r .limits . DisabledRuleGroups ( userID ), r . ring , r . lifecycler . GetInstanceAddr (), r . logger , r . ringCheckErrors )
806
+ owned := r .filterRuleGroups (userID , groups , r .ring )
808
807
if len (owned ) > 0 {
809
808
ownedConfigs [userID ] = owned
810
809
}
811
810
if r .cfg .RulesBackupEnabled () {
812
- backup := r .filterBackupRuleGroups (userID , groups , owned , r .limits . DisabledRuleGroups ( userID ), r . ring , r . lifecycler . GetInstanceAddr (), r . logger , r . ringCheckErrors )
811
+ backup := r .filterBackupRuleGroups (userID , groups , owned , r .ring )
813
812
if len (backup ) > 0 {
814
813
backedUpConfigs [userID ] = backup
815
814
}
@@ -876,10 +875,10 @@ func (r *Ruler) listRulesShuffleSharding(ctx context.Context) (map[string]rulesp
876
875
ruleGroupCounts [userID ] = len (groups )
877
876
gLock .Unlock ()
878
877
879
- filterOwned := r .filterRuleGroups (userID , groups , r . limits . DisabledRuleGroups ( userID ), userRings [userID ], r . lifecycler . GetInstanceAddr (), r . logger , r . ringCheckErrors )
878
+ filterOwned := r .filterRuleGroups (userID , groups , userRings [userID ])
880
879
var filterBackup []* rulespb.RuleGroupDesc
881
880
if r .cfg .RulesBackupEnabled () {
882
- filterBackup = r .filterBackupRuleGroups (userID , groups , filterOwned , r . limits . DisabledRuleGroups ( userID ), userRings [userID ], r . lifecycler . GetInstanceAddr (), r . logger , r . ringCheckErrors )
881
+ filterBackup = r .filterBackupRuleGroups (userID , groups , filterOwned , userRings [userID ])
883
882
}
884
883
if len (filterOwned ) == 0 && len (filterBackup ) == 0 {
885
884
continue
@@ -906,28 +905,29 @@ func (r *Ruler) listRulesShuffleSharding(ctx context.Context) (map[string]rulesp
906
905
// This function only uses User, Namespace, and Name fields of individual RuleGroups.
907
906
//
908
907
// This method must not use r.ring, but only ring passed as parameter.
909
- func (r * Ruler ) filterRuleGroups (userID string , ruleGroups []* rulespb.RuleGroupDesc , disabledRuleGroups validation. DisabledRuleGroups , ring ring.ReadRing , instanceAddr string , log log. Logger , ringCheckErrors prometheus. Counter ) []* rulespb.RuleGroupDesc {
908
+ func (r * Ruler ) filterRuleGroups (userID string , ruleGroups []* rulespb.RuleGroupDesc , ring ring.ReadRing ) []* rulespb.RuleGroupDesc {
910
909
// Prune the rule group to only contain rules that this ruler is responsible for, based on ring.
911
910
var result []* rulespb.RuleGroupDesc
911
+
912
912
for _ , g := range ruleGroups {
913
- owned , err := r .instanceOwnsRuleGroup (ring , g , disabledRuleGroups , instanceAddr , false )
913
+ owned , err := r .instanceOwnsRuleGroup (ring , g , r . limits . DisabledRuleGroups ( userID ) , false )
914
914
if err != nil {
915
915
switch e := err .(type ) {
916
916
case * DisabledRuleGroupErr :
917
- level .Info (log ).Log ("msg" , e .Message )
917
+ level .Info (r . logger ).Log ("msg" , e .Message )
918
918
continue
919
919
default :
920
- ringCheckErrors .Inc ()
921
- level .Error (log ).Log ("msg" , "failed to check if the ruler replica owns the rule group" , "user" , userID , "namespace" , g .Namespace , "group" , g .Name , "err" , err )
920
+ r . ringCheckErrors .Inc ()
921
+ level .Error (r . logger ).Log ("msg" , "failed to check if the ruler replica owns the rule group" , "user" , userID , "namespace" , g .Namespace , "group" , g .Name , "err" , err )
922
922
continue
923
923
}
924
924
}
925
925
926
926
if owned {
927
- level .Debug (log ).Log ("msg" , "rule group owned" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
927
+ level .Debug (r . logger ).Log ("msg" , "rule group owned" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
928
928
result = append (result , g )
929
929
} else {
930
- level .Debug (log ).Log ("msg" , "rule group not owned, ignoring" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
930
+ level .Debug (r . logger ).Log ("msg" , "rule group not owned, ignoring" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
931
931
}
932
932
}
933
933
@@ -938,7 +938,7 @@ func (r *Ruler) filterRuleGroups(userID string, ruleGroups []*rulespb.RuleGroupD
938
938
// This function only uses User, Namespace, and Name fields of individual RuleGroups.
939
939
//
940
940
// This method must not use r.ring, but only ring passed as parameter
941
- func (r * Ruler ) filterBackupRuleGroups (userID string , ruleGroups []* rulespb.RuleGroupDesc , owned []* rulespb.RuleGroupDesc , disabledRuleGroups validation. DisabledRuleGroups , ring ring.ReadRing , instanceAddr string , log log. Logger , ringCheckErrors prometheus. Counter ) []* rulespb.RuleGroupDesc {
941
+ func (r * Ruler ) filterBackupRuleGroups (userID string , ruleGroups []* rulespb.RuleGroupDesc , owned []* rulespb.RuleGroupDesc , ring ring.ReadRing ) []* rulespb.RuleGroupDesc {
942
942
var result []* rulespb.RuleGroupDesc
943
943
ownedMap := map [uint32 ]struct {}{}
944
944
for _ , g := range owned {
@@ -951,24 +951,24 @@ func (r *Ruler) filterBackupRuleGroups(userID string, ruleGroups []*rulespb.Rule
951
951
if _ , OK := ownedMap [hash ]; OK {
952
952
continue
953
953
}
954
- backup , err := r .instanceOwnsRuleGroup (ring , g , disabledRuleGroups , instanceAddr , true )
954
+ backup , err := r .instanceOwnsRuleGroup (ring , g , r . limits . DisabledRuleGroups ( userID ) , true )
955
955
if err != nil {
956
956
switch e := err .(type ) {
957
957
case * DisabledRuleGroupErr :
958
- level .Info (log ).Log ("msg" , e .Message )
958
+ level .Info (r . logger ).Log ("msg" , e .Message )
959
959
continue
960
960
default :
961
- ringCheckErrors .Inc ()
962
- level .Error (log ).Log ("msg" , "failed to check if the ruler replica backs up the rule group" , "user" , userID , "namespace" , g .Namespace , "group" , g .Name , "err" , err )
961
+ r . ringCheckErrors .Inc ()
962
+ level .Error (r . logger ).Log ("msg" , "failed to check if the ruler replica backs up the rule group" , "user" , userID , "namespace" , g .Namespace , "group" , g .Name , "err" , err )
963
963
continue
964
964
}
965
965
}
966
966
967
967
if backup {
968
- level .Debug (log ).Log ("msg" , "rule group backed up" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
968
+ level .Debug (r . logger ).Log ("msg" , "rule group backed up" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
969
969
result = append (result , g )
970
970
} else {
971
- level .Debug (log ).Log ("msg" , "rule group not backed up, ignoring" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
971
+ level .Debug (r . logger ).Log ("msg" , "rule group not backed up, ignoring" , "user" , g .User , "namespace" , g .Namespace , "name" , g .Name )
972
972
}
973
973
}
974
974
0 commit comments