diff --git a/README.md b/README.md index 01bd8b30d..0716f990b 100644 --- a/README.md +++ b/README.md @@ -90,6 +90,16 @@ auth_modules: sslmode: disable ``` +### timeouts +This section allows configuring a default and per-collector timeouts to help metrics get scraped even if a query runs or blocks longer than your configured Prometheus scrape timeout. If a timeout is unset or set to 0 (or `0s`, `0m`, etc.) the timeout is disabled. + +```yaml +timeouts: + default: 2s + collectors: + stat_user_tables: 5s +``` + ## Building and running git clone https://github.com/prometheus-community/postgres_exporter.git diff --git a/cmd/postgres_exporter/probe.go b/cmd/postgres_exporter/probe.go index 2c8c7652e..b754a40a5 100644 --- a/cmd/postgres_exporter/probe.go +++ b/cmd/postgres_exporter/probe.go @@ -82,7 +82,7 @@ func handleProbe(logger *slog.Logger, excludeDatabases []string) http.HandlerFun registry.MustRegister(exporter) // Run the probe - pc, err := collector.NewProbeCollector(tl, excludeDatabases, registry, dsn) + pc, err := collector.NewProbeCollector(tl, excludeDatabases, registry, dsn, conf.Timeouts) if err != nil { logger.Error("Error creating probe collector", "err", err) http.Error(w, err.Error(), http.StatusInternalServerError) diff --git a/collector/probe.go b/collector/probe.go index e40d6fee1..d30674087 100644 --- a/collector/probe.go +++ b/collector/probe.go @@ -27,9 +27,10 @@ type ProbeCollector struct { collectors map[string]Collector logger *slog.Logger instance *instance + timeouts config.Timeouts } -func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN) (*ProbeCollector, error) { +func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN, timeouts config.Timeouts) (*ProbeCollector, error) { collectors := make(map[string]Collector) initiatedCollectorsMtx.Lock() defer initiatedCollectorsMtx.Unlock() @@ -57,6 +58,12 @@ func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry } } + for name := range timeouts.Collectors { + if _, ok := collectors[name]; !ok { + logger.Warn("timeout set for non-enabled collector", "collector", name) + } + } + instance, err := newInstance(dsn.GetConnectionString()) if err != nil { return nil, err @@ -67,6 +74,7 @@ func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry collectors: collectors, logger: logger, instance: instance, + timeouts: timeouts, }, nil } @@ -86,8 +94,10 @@ func (pc *ProbeCollector) Collect(ch chan<- prometheus.Metric) { wg.Add(len(pc.collectors)) for name, c := range pc.collectors { go func(name string, c Collector) { - execute(context.TODO(), name, c, pc.instance, ch, pc.logger) - wg.Done() + ctx, cancel := pc.timeouts.Context(context.TODO(), name) + defer cancel() + defer wg.Done() + execute(ctx, name, c, pc.instance, ch, pc.logger) }(name, c) } wg.Wait() diff --git a/config/config.go b/config/config.go index 52c66513a..ee8c565e8 100644 --- a/config/config.go +++ b/config/config.go @@ -14,10 +14,12 @@ package config import ( + "context" "fmt" "log/slog" "os" "sync" + "time" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" @@ -40,6 +42,8 @@ var ( type Config struct { AuthModules map[string]AuthModule `yaml:"auth_modules"` + + Timeouts Timeouts `yaml:"timeouts"` } type AuthModule struct { @@ -54,6 +58,11 @@ type UserPass struct { Password string `yaml:"password"` } +type Timeouts struct { + Default time.Duration `yaml:"default"` + Collectors map[string]time.Duration `yaml:"collectors"` +} + type Handler struct { sync.RWMutex Config *Config @@ -118,3 +127,14 @@ func (m AuthModule) ConfigureTarget(target string) (DSN, error) { return dsn, nil } + +func (t Timeouts) Context(parent context.Context, collector string) (context.Context, context.CancelFunc) { + timeout, ok := t.Collectors[collector] + if !ok { + timeout = t.Default + } + if timeout == 0 { + return context.WithCancel(parent) + } + return context.WithTimeout(parent, timeout) +} diff --git a/config/config_test.go b/config/config_test.go index fa59c9b40..12b4d1bd0 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -45,6 +45,10 @@ func TestLoadBadConfigs(t *testing.T) { input: "testdata/config-bad-extra-field.yaml", want: "error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule", }, + { + input: "testdata/config-bad-timeout-duration.yaml", + want: "error parsing config file \"testdata/config-bad-timeout-duration.yaml\": yaml: unmarshal errors:\n line 10: cannot unmarshal !!str `not a time` into time.Duration", + }, } for _, test := range tests { diff --git a/config/testdata/config-bad-timeout-duration.yaml b/config/testdata/config-bad-timeout-duration.yaml new file mode 100644 index 000000000..8141c995b --- /dev/null +++ b/config/testdata/config-bad-timeout-duration.yaml @@ -0,0 +1,10 @@ +auth_modules: + first: + type: userpass + userpass: + username: first + password: firstpass + options: + sslmode: disable +timeouts: + default: not a time diff --git a/config/testdata/config-good.yaml b/config/testdata/config-good.yaml index 13453e26f..56f3897e3 100644 --- a/config/testdata/config-good.yaml +++ b/config/testdata/config-good.yaml @@ -6,3 +6,7 @@ auth_modules: password: firstpass options: sslmode: disable +timeouts: + default: 5s + collectors: + example: 10s