Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,16 @@ auth_modules:
sslmode: disable
```

### timeouts
This section allows configuring a default and per-collector timeouts to help metrics get scraped even if a query runs or blocks longer than your configured Prometheus scrape timeout. If a timeout is unset or set to 0 (or `0s`, `0m`, etc.) the timeout is disabled.

```yaml
timeouts:
default: 2s
collectors:
stat_user_tables: 5s
```

## Building and running

git clone https://github.com/prometheus-community/postgres_exporter.git
Expand Down
2 changes: 1 addition & 1 deletion cmd/postgres_exporter/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func handleProbe(logger *slog.Logger, excludeDatabases []string) http.HandlerFun
registry.MustRegister(exporter)

// Run the probe
pc, err := collector.NewProbeCollector(tl, excludeDatabases, registry, dsn)
pc, err := collector.NewProbeCollector(tl, excludeDatabases, registry, dsn, conf.Timeouts)
if err != nil {
logger.Error("Error creating probe collector", "err", err)
http.Error(w, err.Error(), http.StatusInternalServerError)
Expand Down
16 changes: 13 additions & 3 deletions collector/probe.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,10 @@ type ProbeCollector struct {
collectors map[string]Collector
logger *slog.Logger
instance *instance
timeouts config.Timeouts
}

func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN) (*ProbeCollector, error) {
func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry *prometheus.Registry, dsn config.DSN, timeouts config.Timeouts) (*ProbeCollector, error) {
collectors := make(map[string]Collector)
initiatedCollectorsMtx.Lock()
defer initiatedCollectorsMtx.Unlock()
Expand Down Expand Up @@ -57,6 +58,12 @@ func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry
}
}

for name := range timeouts.Collectors {
if _, ok := collectors[name]; !ok {
logger.Warn("timeout set for non-enabled collector", "collector", name)
}
}

instance, err := newInstance(dsn.GetConnectionString())
if err != nil {
return nil, err
Expand All @@ -67,6 +74,7 @@ func NewProbeCollector(logger *slog.Logger, excludeDatabases []string, registry
collectors: collectors,
logger: logger,
instance: instance,
timeouts: timeouts,
}, nil
}

Expand All @@ -86,8 +94,10 @@ func (pc *ProbeCollector) Collect(ch chan<- prometheus.Metric) {
wg.Add(len(pc.collectors))
for name, c := range pc.collectors {
go func(name string, c Collector) {
execute(context.TODO(), name, c, pc.instance, ch, pc.logger)
wg.Done()
ctx, cancel := pc.timeouts.Context(context.TODO(), name)
defer cancel()
defer wg.Done()
execute(ctx, name, c, pc.instance, ch, pc.logger)
}(name, c)
}
wg.Wait()
Expand Down
20 changes: 20 additions & 0 deletions config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
package config

import (
"context"
"fmt"
"log/slog"
"os"
"sync"
"time"

"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
Expand All @@ -40,6 +42,8 @@ var (

type Config struct {
AuthModules map[string]AuthModule `yaml:"auth_modules"`

Timeouts Timeouts `yaml:"timeouts"`
}

type AuthModule struct {
Expand All @@ -54,6 +58,11 @@ type UserPass struct {
Password string `yaml:"password"`
}

type Timeouts struct {
Default time.Duration `yaml:"default"`
Collectors map[string]time.Duration `yaml:"collectors"`
}

type Handler struct {
sync.RWMutex
Config *Config
Expand Down Expand Up @@ -118,3 +127,14 @@ func (m AuthModule) ConfigureTarget(target string) (DSN, error) {

return dsn, nil
}

func (t Timeouts) Context(parent context.Context, collector string) (context.Context, context.CancelFunc) {
timeout, ok := t.Collectors[collector]
if !ok {
timeout = t.Default
}
if timeout == 0 {
return context.WithCancel(parent)
}
return context.WithTimeout(parent, timeout)
}
4 changes: 4 additions & 0 deletions config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ func TestLoadBadConfigs(t *testing.T) {
input: "testdata/config-bad-extra-field.yaml",
want: "error parsing config file \"testdata/config-bad-extra-field.yaml\": yaml: unmarshal errors:\n line 8: field doesNotExist not found in type config.AuthModule",
},
{
input: "testdata/config-bad-timeout-duration.yaml",
want: "error parsing config file \"testdata/config-bad-timeout-duration.yaml\": yaml: unmarshal errors:\n line 10: cannot unmarshal !!str `not a time` into time.Duration",
},
}

for _, test := range tests {
Expand Down
10 changes: 10 additions & 0 deletions config/testdata/config-bad-timeout-duration.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
auth_modules:
first:
type: userpass
userpass:
username: first
password: firstpass
options:
sslmode: disable
timeouts:
default: not a time
4 changes: 4 additions & 0 deletions config/testdata/config-good.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,7 @@ auth_modules:
password: firstpass
options:
sslmode: disable
timeouts:
default: 5s
collectors:
example: 10s