@@ -1093,6 +1093,148 @@ func TestRulerDisablesRuleGroups(t *testing.T) {
1093
1093
})
1094
1094
}
1095
1095
1096
+ func TestRulerHA (t * testing.T ) {
1097
+ const numRulesGroups = 20
1098
+
1099
+ random := rand .New (rand .NewSource (time .Now ().UnixNano ()))
1100
+ s , err := e2e .NewScenario (networkName )
1101
+ require .NoError (t , err )
1102
+ defer s .Close ()
1103
+
1104
+ // Generate multiple rule groups, with 1 rule each.
1105
+ ruleGroups := make ([]rulefmt.RuleGroup , numRulesGroups )
1106
+ expectedNames := make ([]string , numRulesGroups )
1107
+ alertCount := 0
1108
+ evalInterval , _ := model .ParseDuration ("5s" )
1109
+ for i := 0 ; i < numRulesGroups ; i ++ {
1110
+ num := random .Intn (10 )
1111
+ var ruleNode yaml.Node
1112
+ var exprNode yaml.Node
1113
+
1114
+ ruleNode .SetString (fmt .Sprintf ("rule_%d" , i ))
1115
+ exprNode .SetString (strconv .Itoa (i ))
1116
+ ruleName := fmt .Sprintf ("test_%d" , i )
1117
+
1118
+ expectedNames [i ] = ruleName
1119
+
1120
+ if num % 2 == 0 {
1121
+ alertCount ++
1122
+ ruleGroups [i ] = rulefmt.RuleGroup {
1123
+ Name : ruleName ,
1124
+ Interval : evalInterval ,
1125
+ Rules : []rulefmt.RuleNode {{
1126
+ Alert : ruleNode ,
1127
+ Expr : exprNode ,
1128
+ }},
1129
+ }
1130
+ } else {
1131
+ ruleGroups [i ] = rulefmt.RuleGroup {
1132
+ Name : ruleName ,
1133
+ Interval : evalInterval ,
1134
+ Rules : []rulefmt.RuleNode {{
1135
+ Record : ruleNode ,
1136
+ Expr : exprNode ,
1137
+ }},
1138
+ }
1139
+ }
1140
+ }
1141
+
1142
+ // Start dependencies.
1143
+ consul := e2edb .NewConsul ()
1144
+ minio := e2edb .NewMinio (9000 , rulestoreBucketName )
1145
+ require .NoError (t , s .StartAndWaitReady (consul , minio ))
1146
+
1147
+ // Configure the ruler.
1148
+ overrides := map [string ]string {
1149
+ // Since we're not going to run any rule, we don't need the
1150
+ // store-gateway to be configured to a valid address.
1151
+ "-querier.store-gateway-addresses" : "localhost:12345" ,
1152
+ // Enable the bucket index so we can skip the initial bucket scan.
1153
+ "-blocks-storage.bucket-store.bucket-index.enabled" : "true" ,
1154
+ "-ruler.ring.replication-factor" : "2" ,
1155
+ "-ruler.enable-ha" : "true" ,
1156
+ "-ruler.poll-interval" : "5s" ,
1157
+ "-ruler.list-rules-fanout-timeout" : "2s" ,
1158
+ "-ruler.liveness-check-timeout" : "50ms" ,
1159
+ }
1160
+
1161
+ rulerFlags := mergeFlags (
1162
+ BlocksStorageFlags (),
1163
+ RulerFlags (),
1164
+ RulerShardingFlags (consul .NetworkHTTPEndpoint ()),
1165
+ overrides ,
1166
+ )
1167
+
1168
+ // Start rulers.
1169
+ ruler1 := e2ecortex .NewRuler ("ruler-1" , consul .NetworkHTTPEndpoint (), rulerFlags , "" )
1170
+ ruler2 := e2ecortex .NewRuler ("ruler-2" , consul .NetworkHTTPEndpoint (), rulerFlags , "" )
1171
+ ruler3 := e2ecortex .NewRuler ("ruler-3" , consul .NetworkHTTPEndpoint (), rulerFlags , "" )
1172
+ rulers := e2ecortex .NewCompositeCortexService (ruler1 , ruler2 , ruler3 )
1173
+ require .NoError (t , s .StartAndWaitReady (ruler1 , ruler2 , ruler3 ))
1174
+
1175
+ // Upload rule groups to one of the rulers.
1176
+ c , err := e2ecortex .NewClient ("" , "" , "" , ruler1 .HTTPEndpoint (), "user-1" )
1177
+ require .NoError (t , err )
1178
+ namespaceNames := []string {"test1" , "test2" , "test3" , "test4" , "test5" }
1179
+ namespaceNameCount := make ([]int , 5 )
1180
+ nsRand := rand .New (rand .NewSource (time .Now ().UnixNano ()))
1181
+ for _ , ruleGroup := range ruleGroups {
1182
+ index := nsRand .Intn (len (namespaceNames ))
1183
+ namespaceNameCount [index ] = namespaceNameCount [index ] + 1
1184
+ require .NoError (t , c .SetRuleGroup (ruleGroup , namespaceNames [index ]))
1185
+ }
1186
+
1187
+ // Wait until rulers have loaded all rules.
1188
+ require .NoError (t , rulers .WaitSumMetricsWithOptions (e2e .Equals (numRulesGroups ), []string {"cortex_prometheus_rule_group_rules" }, e2e .WaitMissingMetrics ))
1189
+
1190
+ ruler1SyncTotal , err := ruler1 .SumMetrics ([]string {"cortex_ruler_sync_rules_total" })
1191
+ require .NoError (t , err )
1192
+ ruler3SyncTotal , err := ruler3 .SumMetrics ([]string {"cortex_ruler_sync_rules_total" })
1193
+ require .NoError (t , err )
1194
+
1195
+ err = consul .Kill () // kill consul so the rulers will operate with the tokens/instances they already have
1196
+ require .NoError (t , err )
1197
+
1198
+ err = ruler2 .Kill ()
1199
+ require .NoError (t , err )
1200
+
1201
+ // wait for another sync
1202
+ require .NoError (t , ruler1 .WaitSumMetrics (e2e .Greater (ruler1SyncTotal [0 ]), "cortex_ruler_sync_rules_total" ))
1203
+ require .NoError (t , ruler3 .WaitSumMetrics (e2e .Greater (ruler3SyncTotal [0 ]), "cortex_ruler_sync_rules_total" ))
1204
+
1205
+ rulers = e2ecortex .NewCompositeCortexService (ruler1 , ruler3 )
1206
+ require .NoError (t , rulers .WaitSumMetricsWithOptions (e2e .Equals (numRulesGroups ), []string {"cortex_prometheus_rule_group_rules" }, e2e .WaitMissingMetrics ))
1207
+
1208
+ t .Log (ruler1 .SumMetrics ([]string {"cortex_prometheus_rule_group_rules" }))
1209
+ t .Log (ruler3 .SumMetrics ([]string {"cortex_prometheus_rule_group_rules" }))
1210
+
1211
+ c3 , err := e2ecortex .NewClient ("" , "" , "" , ruler3 .HTTPEndpoint (), "user-1" )
1212
+ require .NoError (t , err )
1213
+
1214
+ ruler1Rules , err := c .GetRuleGroups ()
1215
+ require .NoError (t , err )
1216
+
1217
+ ruler3Rules , err := c3 .GetRuleGroups ()
1218
+ require .NoError (t , err )
1219
+
1220
+ ruleCount := 0
1221
+ countFunc := func (ruleGroups map [string ][]rulefmt.RuleGroup ) {
1222
+ for _ , v := range ruleGroups {
1223
+ ruleCount += len (v )
1224
+ }
1225
+ }
1226
+
1227
+ countFunc (ruler1Rules )
1228
+ require .Equal (t , numRulesGroups , ruleCount )
1229
+ ruleCount = 0
1230
+ countFunc (ruler3Rules )
1231
+ require .Equal (t , numRulesGroups , ruleCount )
1232
+
1233
+ results , err := c .GetPrometheusRules (e2ecortex.RuleFilter {})
1234
+ require .NoError (t , err )
1235
+ require .Equal (t , numRulesGroups , len (results ))
1236
+ }
1237
+
1096
1238
func TestRulerKeepFiring (t * testing.T ) {
1097
1239
s , err := e2e .NewScenario (networkName )
1098
1240
require .NoError (t , err )
0 commit comments