Skip to content

Commit a87c25f

Browse files
authored
Fixes the registration of the Alertmanager alert receiving API metrics (#3065)
* Remove TODO about prometheus/alertmanager#2182 as it got merged Signed-off-by: gotjosh <[email protected]> * Remove TODO about prometheus/alertmanager#2200 as it got merged Signed-off-by: gotjosh <[email protected]> * Register the Alertmanager API metrics Signed-off-by: gotjosh <[email protected]> * Add a changelog entry Signed-off-by: gotjosh <[email protected]> * Fix tests Signed-off-by: gotjosh <[email protected]>
1 parent 2cb22f8 commit a87c25f

File tree

5 files changed

+43
-13
lines changed

5 files changed

+43
-13
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* [BUGFIX] Experimental blocks storage: Ingester is less likely to hit gRPC message size limit when streaming data to queriers. #3015
2828
* [BUGFIX] Fix configuration for TLS server validation, TLS skip verify was hardcoded to true for all TLS configurations and prevented validation of server certificates. #3030
2929
* [BUGFIX] Fixes the Alertmanager panicking when no `-alertmanager.web.external-url` is provided. #3017
30+
* [BUGFIX] Fixes the registration of the Alertmanager API metrics `cortex_alertmanager_alerts_received_total` and `cortex_alertmanager_alerts_invalid_total`. #3065
3031

3132
## 1.3.0 / 2020-08-21
3233

integration/alertmanager_test.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"context"
77
"testing"
88

9+
"github.com/prometheus/common/model"
910
"github.com/prometheus/prometheus/pkg/labels"
1011
"github.com/stretchr/testify/require"
1112

@@ -95,6 +96,13 @@ func TestAlertmanagerStoreAPI(t *testing.T) {
9596
require.Len(t, cfg.Receivers, 1)
9697
require.Equal(t, "example_receiver", cfg.Receivers[0].Name)
9798

99+
err = c.SendAlertToAlermanager(context.Background(), &model.Alert{Labels: model.LabelSet{"foo": "bar"}})
100+
require.NoError(t, err)
101+
102+
require.NoError(t, am.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_alertmanager_alerts_received_total"},
103+
e2e.WithLabelMatchers(labels.MustNewMatcher(labels.MatchEqual, "user", "user-1")),
104+
e2e.WaitMissingMetrics))
105+
98106
err = c.DeleteAlertmanagerConfig(context.Background())
99107
require.NoError(t, err)
100108

integration/e2ecortex/client.go

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/gogo/protobuf/proto"
1616
"github.com/golang/snappy"
1717
alertConfig "github.com/prometheus/alertmanager/config"
18+
"github.com/prometheus/alertmanager/types"
1819
promapi "github.com/prometheus/client_golang/api"
1920
promv1 "github.com/prometheus/client_golang/api/prometheus/v1"
2021
"github.com/prometheus/common/model"
@@ -380,6 +381,32 @@ func (c *Client) DeleteAlertmanagerConfig(ctx context.Context) error {
380381
return nil
381382
}
382383

384+
// SendAlertToAlermanager sends alerts to the Alertmanager API
385+
func (c *Client) SendAlertToAlermanager(ctx context.Context, alert *model.Alert) error {
386+
u := c.alertmanagerClient.URL("/api/prom/api/v1/alerts", nil)
387+
388+
data, err := json.Marshal([]types.Alert{{Alert: *alert}})
389+
if err != nil {
390+
return fmt.Errorf("error marshaling the alert: %v", err)
391+
}
392+
393+
req, err := http.NewRequest(http.MethodPost, u.String(), bytes.NewReader(data))
394+
if err != nil {
395+
return fmt.Errorf("error creating request: %v", err)
396+
}
397+
398+
resp, body, err := c.alertmanagerClient.Do(ctx, req)
399+
if err != nil {
400+
return err
401+
}
402+
403+
if resp.StatusCode != http.StatusOK {
404+
return fmt.Errorf("sending alert failed with status %d and error %v", resp.StatusCode, string(body))
405+
}
406+
407+
return nil
408+
}
409+
383410
func (c *Client) PostRequest(url string, body io.Reader) (*http.Response, error) {
384411
req, err := http.NewRequest("POST", url, body)
385412
if err != nil {

pkg/alertmanager/alertmanager.go

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -67,20 +67,16 @@ type Alertmanager struct {
6767
mux *http.ServeMux
6868
registry *prometheus.Registry
6969

70+
// The Dispatcher is the only component we need to recreate when we call ApplyConfig.
71+
// Given its metrics don't have any variable labels we need to re-use the same metrics.
72+
dispatcherMetrics *dispatch.DispatcherMetrics
73+
7074
activeMtx sync.Mutex
7175
active bool
7276
}
7377

7478
var (
7579
webReload = make(chan chan error)
76-
77-
// In order to workaround a bug in the alertmanager, which doesn't register the
78-
// metrics in the input registry but to the global default one, we do define a
79-
// singleton dispatcher metrics instance that is going to be shared across all
80-
// tenants alertmanagers.
81-
// TODO change this once the vendored alertmanager will have this PR merged into:
82-
// https://github.com/prometheus/alertmanager/pull/2200
83-
dispatcherMetrics = dispatch.NewDispatcherMetrics(prometheus.NewRegistry())
8480
)
8581

8682
func init() {
@@ -158,6 +154,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
158154
Silences: am.silences,
159155
StatusFunc: am.marker.Status,
160156
Peer: cfg.Peer,
157+
Registry: am.registry,
161158
Logger: log.With(am.logger, "component", "api"),
162159
GroupFunc: func(f1 func(*dispatch.Route) bool, f2 func(*types.Alert, time.Time) bool) (dispatch.AlertGroups, map[model.Fingerprint][]string) {
163160
return am.dispatcher.Groups(f1, f2)
@@ -172,6 +169,7 @@ func New(cfg *Config, reg *prometheus.Registry) (*Alertmanager, error) {
172169
ui.Register(router, webReload, log.With(am.logger, "component", "ui"))
173170
am.mux = am.api.Register(router, am.cfg.ExternalURL.Path)
174171

172+
am.dispatcherMetrics = dispatch.NewDispatcherMetrics(am.registry)
175173
return am, nil
176174
}
177175

@@ -240,7 +238,7 @@ func (am *Alertmanager) ApplyConfig(userID string, conf *config.Config) error {
240238
am.marker,
241239
timeoutFunc,
242240
log.With(am.logger, "component", "dispatcher"),
243-
dispatcherMetrics,
241+
am.dispatcherMetrics,
244242
)
245243

246244
go am.dispatcher.Run()

pkg/alertmanager/multitenant_test.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
// +build !race
2-
31
package alertmanager
42

53
import (
@@ -57,8 +55,6 @@ func (m *mockAlertStore) DeleteAlertConfig(ctx context.Context, user string) err
5755
return fmt.Errorf("not implemented")
5856
}
5957

60-
// TestLoadAllConfigs ensures the multitenant alertmanager can properly load configs from a local backend store.
61-
// It is excluded from the race detector due to a vendored race issue https://github.com/prometheus/alertmanager/issues/2182
6258
func TestLoadAllConfigs(t *testing.T) {
6359
mockStore := &mockAlertStore{
6460
configs: map[string]alerts.AlertConfigDesc{

0 commit comments

Comments
 (0)