Skip to content

Templates in use are being removed by alertmanager cleanup logic #4890

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 4, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@
* [BUGFIX] Ruler: Fix /ruler/rule_groups returns YAML with extra fields. #4767
* [BUGFIX] Respecting `-tracing.otel.sample-ratio` configuration when enabling OpenTelemetry tracing with X-ray. #4862
* [BUGFIX] QueryFrontend: fixed query_range requests when query has `start` equals to `end`. #4877
* [BUGFIX] AlertManager: fixed issue introduced by #4495 where templates files were being deleted when using alertmanager local store. #4890

## 1.13.0 2022-07-14

Expand Down
36 changes: 35 additions & 1 deletion pkg/alertmanager/alertstore/local/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ import (
)

const (
Name = "local"
Name = "local"
templatesDir = "templates"
)

var (
Expand Down Expand Up @@ -148,9 +149,42 @@ func (f *Store) reloadConfigs() (map[string]alertspb.AlertConfigDesc, error) {
// The file name must correspond to the user tenant ID
user := strings.TrimSuffix(info.Name(), ext)

// Load template files
userTemplateDir := filepath.Join(f.cfg.Path, user, templatesDir)
var templates []*alertspb.TemplateDesc

if _, e := os.Stat(userTemplateDir); e == nil {
err = filepath.Walk(userTemplateDir, func(templatePath string, info os.FileInfo, err error) error {
if err != nil {
return errors.Wrapf(err, "unable to walk file path at %s", templatePath)
}
// Ignore files that are directories
if info.IsDir() {
return nil
}
content, err := os.ReadFile(templatePath)
if err != nil {
return errors.Wrapf(err, "unable to read alertmanager templates %s", templatePath)
}

templates = append(templates, &alertspb.TemplateDesc{
Body: string(content),
Filename: info.Name(),
})
return nil
})

if err != nil {
return errors.Wrapf(err, "unable to list alertmanager templates: %s", userTemplateDir)
}
} else if !os.IsNotExist(e) {
return errors.Wrapf(e, "unable to read alertmanager templates %s", path)
}

configs[user] = alertspb.AlertConfigDesc{
User: user,
RawConfig: string(content),
Templates: templates,
}
return nil
})
Expand Down
19 changes: 17 additions & 2 deletions pkg/alertmanager/alertstore/local/store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -81,17 +81,22 @@ func TestStore_GetAlertConfigs(t *testing.T) {
// The storage contains some configs.
{
user1Cfg := prepareAlertmanagerConfig("user-1")
require.NoError(t, os.WriteFile(filepath.Join(storeDir, "user-1.yaml"), []byte(user1Cfg), os.ModePerm))
user1Dir, user1TemplateDir := prepareUserDir(t, storeDir, true, "user-1")
require.NoError(t, os.WriteFile(filepath.Join(user1Dir, "user-1.yaml"), []byte(user1Cfg), os.ModePerm))

require.NoError(t, os.WriteFile(filepath.Join(user1TemplateDir, "template.tpl"), []byte("testTemplate"), os.ModePerm))

configs, err := store.GetAlertConfigs(ctx, []string{"user-1", "user-2"})
require.NoError(t, err)
assert.Contains(t, configs, "user-1")
assert.NotContains(t, configs, "user-2")
assert.Equal(t, user1Cfg, configs["user-1"].RawConfig)
assert.Equal(t, "testTemplate", configs["user-1"].Templates[0].Body)

// Add another user config.
user2Cfg := prepareAlertmanagerConfig("user-2")
require.NoError(t, os.WriteFile(filepath.Join(storeDir, "user-2.yaml"), []byte(user2Cfg), os.ModePerm))
user2Dir, _ := prepareUserDir(t, storeDir, false, "user-2")
require.NoError(t, os.WriteFile(filepath.Join(user2Dir, "user-2.yaml"), []byte(user2Cfg), os.ModePerm))

configs, err = store.GetAlertConfigs(ctx, []string{"user-1", "user-2"})
require.NoError(t, err)
Expand All @@ -102,6 +107,16 @@ func TestStore_GetAlertConfigs(t *testing.T) {
}
}

func prepareUserDir(t *testing.T, storeDir string, createTemplateDir bool, user string) (userDir string, templateDir string) {
userDir = filepath.Join(storeDir, user)
templateDir = filepath.Join(userDir, templatesDir)
require.NoError(t, os.MkdirAll(userDir, os.ModePerm))
if createTemplateDir {
require.NoError(t, os.MkdirAll(templateDir, os.ModePerm))
}
return
}

func prepareLocalStore(t *testing.T) (store *Store, storeDir string) {
var err error

Expand Down
66 changes: 66 additions & 0 deletions pkg/alertmanager/multitenant_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"os"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"testing"
Expand All @@ -40,6 +41,7 @@ import (
"github.com/cortexproject/cortex/pkg/alertmanager/alertspb"
"github.com/cortexproject/cortex/pkg/alertmanager/alertstore"
"github.com/cortexproject/cortex/pkg/alertmanager/alertstore/bucketclient"
"github.com/cortexproject/cortex/pkg/alertmanager/alertstore/local"
"github.com/cortexproject/cortex/pkg/ring"
"github.com/cortexproject/cortex/pkg/ring/kv/consul"
"github.com/cortexproject/cortex/pkg/storage/bucket"
Expand Down Expand Up @@ -159,6 +161,49 @@ func TestMultitenantAlertmanagerConfig_Validate(t *testing.T) {
}
}

func TestMultitenantAlertmanager_loadAndSyncConfigsLocalStorage(t *testing.T) {
storeDir := t.TempDir()
store, _ := local.NewStore(local.StoreConfig{Path: storeDir})
config := `global:
resolve_timeout: 1m
smtp_require_tls: false

route:
receiver: 'email'

receivers:
- name: 'email'
email_configs:
- to: [email protected]
from: [email protected]
smarthost: smtp:2525
`
user1Dir, user1TemplateDir := prepareUserDir(t, storeDir, "user-1")
user2Dir, _ := prepareUserDir(t, storeDir, "user-2")
require.NoError(t, os.WriteFile(filepath.Join(user1Dir, "user-1.yaml"), []byte(config), os.ModePerm))
require.NoError(t, os.WriteFile(filepath.Join(user2Dir, "user-2.yaml"), []byte(config), os.ModePerm))
require.NoError(t, os.WriteFile(filepath.Join(user1TemplateDir, "template.tpl"), []byte("testTemplate"), os.ModePerm))

originalFiles, err := listFiles(storeDir)
require.NoError(t, err)
require.Equal(t, 3, len(originalFiles))

cfg := mockAlertmanagerConfig(t)
cfg.DataDir = storeDir
reg := prometheus.NewPedanticRegistry()
am, err := createMultitenantAlertmanager(cfg, nil, nil, store, nil, nil, log.NewNopLogger(), reg)
require.NoError(t, err)
for i := 0; i < 5; i++ {
err = am.loadAndSyncConfigs(context.Background(), reasonPeriodic)
require.NoError(t, err)
require.Len(t, am.alertmanagers, 2)
files, err := listFiles(storeDir)
require.NoError(t, err)
// Verify if the files were not deleted
require.Equal(t, originalFiles, files)
}
}

func TestMultitenantAlertmanager_loadAndSyncConfigs(t *testing.T) {
ctx := context.Background()

Expand Down Expand Up @@ -1885,6 +1930,27 @@ func prepareInMemoryAlertStore() alertstore.AlertStore {
return bucketclient.NewBucketAlertStore(objstore.NewInMemBucket(), nil, log.NewNopLogger())
}

func prepareUserDir(t *testing.T, storeDir string, user string) (userDir string, templateDir string) {
userDir = filepath.Join(storeDir, user)
templateDir = filepath.Join(userDir, templatesDir)
require.NoError(t, os.MkdirAll(userDir, os.ModePerm))
require.NoError(t, os.MkdirAll(templateDir, os.ModePerm))
return
}

func listFiles(dir string) ([]string, error) {
var r []string
err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
if !info.IsDir() {
r = append(r, path)
}
return nil
})
sort.Strings(r)

return r, err
}

func TestSafeTemplateFilepath(t *testing.T) {
tests := map[string]struct {
dir string
Expand Down