From 72a731a9d7c6202d526498f26278ca414801aac4 Mon Sep 17 00:00:00 2001 From: Yijie Qin Date: Thu, 13 Jul 2023 18:55:31 -0400 Subject: [PATCH 1/3] add alert name in error log Signed-off-by: Yijie Qin --- pkg/alertmanager/alertmanager.go | 4 ++-- pkg/alertmanager/alertmanager_test.go | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/alertmanager/alertmanager.go b/pkg/alertmanager/alertmanager.go index 6ac39ddfbb6..d5a2aa616b9 100644 --- a/pkg/alertmanager/alertmanager.go +++ b/pkg/alertmanager/alertmanager.go @@ -604,7 +604,7 @@ func (g *dispatcherLimits) MaxNumberOfAggregationGroups() int { } var ( - errTooManyAlerts = "too many alerts, limit: %d" + errTooManyAlerts = "too many alerts, limit: %d, alert name: %s" errAlertsTooBig = "alerts too big, total size limit: %d bytes" ) @@ -670,7 +670,7 @@ func (a *alertsLimiter) PreStore(alert *types.Alert, existing bool) error { if !existing && countLimit > 0 && (a.count+1) > countLimit { a.failureCounter.Inc() - return fmt.Errorf(errTooManyAlerts, countLimit) + return fmt.Errorf(errTooManyAlerts, countLimit, alert.Name()) } if existing { diff --git a/pkg/alertmanager/alertmanager_test.go b/pkg/alertmanager/alertmanager_test.go index 23a309f47eb..52aa186c391 100644 --- a/pkg/alertmanager/alertmanager_test.go +++ b/pkg/alertmanager/alertmanager_test.go @@ -114,7 +114,7 @@ route: var ( alert1 = model.Alert{ - Labels: model.LabelSet{"alert": "first"}, + Labels: model.LabelSet{"alert": "first", "alertname": "alert1"}, Annotations: model.LabelSet{"job": "test"}, StartsAt: time.Now(), EndsAt: time.Now(), @@ -123,7 +123,7 @@ var ( alert1Size = alertSize(alert1) alert2 = model.Alert{ - Labels: model.LabelSet{"alert": "second"}, + Labels: model.LabelSet{"alert": "second", "alertname": "alert2"}, Annotations: model.LabelSet{"job": "test", "cluster": "prod"}, StartsAt: time.Now(), EndsAt: time.Now(), @@ -161,7 +161,7 @@ func TestAlertsLimiterWithCountLimit(t *testing.T) { ops := []callbackOp{ {alert: &types.Alert{Alert: alert1}, existing: false, expectedCount: 1, expectedTotalSize: alert1Size}, - {alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errTooManyAlerts, 1), expectedCount: 1, expectedTotalSize: alert1Size}, + {alert: &types.Alert{Alert: alert2}, existing: false, expectedInsertError: fmt.Errorf(errTooManyAlerts, 1, alert2.Name()), expectedCount: 1, expectedTotalSize: alert1Size}, {alert: &types.Alert{Alert: alert1}, delete: true, expectedCount: 0, expectedTotalSize: 0}, {alert: &types.Alert{Alert: alert2}, existing: false, expectedCount: 1, expectedTotalSize: alert2Size}, From e7aa4d19e8e53b6998a4ef32d812ee9301c14252 Mon Sep 17 00:00:00 2001 From: Yijie Qin Date: Fri, 14 Jul 2023 12:06:11 -0400 Subject: [PATCH 2/3] add changelog Signed-off-by: Yijie Qin --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87b3153f2ee..81beaa5e2d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ * [BUGFIX] Store Gateway: Fix bug in store gateway ring comparison logic. #5426 * [BUGFIX] Ring: Fix bug in consistency of Get func in a scaling zone-aware ring. #5429 * [BUGFIX] Query Frontend: Fix bug of failing to cancel downstream request context in query frontend v2 mode (query scheduler enabled). #5447 +* [ENHANCEMENT] Alertmanager: Add the alert name in error log when it get throttled. #5456 ## 1.15.1 2023-04-26 From 456b6aa2f2c27fb8c1a89f540d21f2515ee415de Mon Sep 17 00:00:00 2001 From: Yijie Qin Date: Fri, 14 Jul 2023 12:23:55 -0400 Subject: [PATCH 3/3] address comment Signed-off-by: Yijie Qin --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 81beaa5e2d4..fbe7d9be528 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ * [ENHANCEMENT] Distributor/Ingester: Add experimental `-distributor.sign_write_requests` flag to sign the write requests. #5430 * [ENHANCEMENT] Store Gateway/Querier/Compactor: Handling CMK Access Denied errors. #5420 #5442 #5446 * [ENHANCEMENT] Store Gateway: Implementing multi level index cache. #5451 +* [ENHANCEMENT] Alertmanager: Add the alert name in error log when it get throttled. #5456 * [BUGFIX] Ruler: Validate if rule group can be safely converted back to rule group yaml from protobuf message #5265 * [BUGFIX] Querier: Convert gRPC `ResourceExhausted` status code from store gateway to 422 limit error. #5286 * [BUGFIX] Alertmanager: Route web-ui requests to the alertmanager distributor when sharding is enabled. #5293 @@ -52,7 +53,6 @@ * [BUGFIX] Store Gateway: Fix bug in store gateway ring comparison logic. #5426 * [BUGFIX] Ring: Fix bug in consistency of Get func in a scaling zone-aware ring. #5429 * [BUGFIX] Query Frontend: Fix bug of failing to cancel downstream request context in query frontend v2 mode (query scheduler enabled). #5447 -* [ENHANCEMENT] Alertmanager: Add the alert name in error log when it get throttled. #5456 ## 1.15.1 2023-04-26