From 5c893b05d3c7952cbb94e9cbfd35d55894c678ae Mon Sep 17 00:00:00 2001 From: Sergei Klochkov Date: Thu, 11 Sep 2025 16:33:32 +0100 Subject: [PATCH 1/2] feat(collector): add sys user_summary by statement latency/type; clamp negative memory; unit tests Add two new collectors: - --collect.sys.user_summary_by_statement_latency - --collect.sys.user_summary_by_statement_type Both follow the existing sys collector patterns: - metric names: mysql_sys_user_summary_by_statement_{latency|type}_* - label sets: {user} and {user,statement} - latencies converted from picoseconds to seconds (picoSeconds) - exported as gauges Also harden --collect.sys.user_summary against negative memory values observed on MySQL 8.x by clamping: GREATEST(current_memory, 0) AS current_memory GREATEST(total_memory_allocated, 0) AS total_memory_allocated Tests: - Update sys_user_summary_test.go: * robust SQL regex match * channel-read guard (no nil deref) * column name typo fixed ("statements") * expected values aligned with SQL-side clamping - Add sys_user_summary_by_statement_latency_test.go - Add sys_user_summary_by_statement_type_test.go Notes: - No changes to default enablement; flags must be passed explicitly. - Metric help strings note seconds for latency metrics. Verification: - go test ./collector -run UserSummary - go test ./... (full) Signed-off-by: Sergei Klochkov --- README.md | 2 + collector/sys_user_summary.go | 4 +- .../sys_user_summary_by_statement_latency.go | 120 ++++++++++++++++++ ..._user_summary_by_statement_latency_test.go | 115 +++++++++++++++++ .../sys_user_summary_by_statement_type.go | 119 +++++++++++++++++ ...sys_user_summary_by_statement_type_test.go | 116 +++++++++++++++++ collector/sys_user_summary_test.go | 8 +- mysqld_exporter.go | 2 + 8 files changed, 481 insertions(+), 5 deletions(-) create mode 100644 collector/sys_user_summary_by_statement_latency.go create mode 100644 collector/sys_user_summary_by_statement_latency_test.go create mode 100644 collector/sys_user_summary_by_statement_type.go create mode 100644 collector/sys_user_summary_by_statement_type_test.go diff --git a/README.md b/README.md index 8c0bbf3f6..dedbb70a4 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,8 @@ collect.perf_schema.replication_applier_status_by_worker | 5.7 | C collect.slave_status | 5.1 | Collect from SHOW SLAVE STATUS (Enabled by default) collect.slave_hosts | 5.1 | Collect from SHOW SLAVE HOSTS collect.sys.user_summary | 5.7 | Collect metrics from sys.x$user_summary (disabled by default). +collect.sys.user_summary_by_statement_latency | 5.7 | Collect metrics from sys.x$user_summary_by_statement_latency (disabled by default). +collect.sys.user_summary_by_statement_type | 5.7 | Collects metrics from sys.x$user_summary_by_statement_type (disabled by default). ### General Flags diff --git a/collector/sys_user_summary.go b/collector/sys_user_summary.go index e4d38ff1b..20fdeef85 100644 --- a/collector/sys_user_summary.go +++ b/collector/sys_user_summary.go @@ -31,8 +31,8 @@ const sysUserSummaryQuery = ` current_connections, total_connections, unique_hosts, - current_memory, - total_memory_allocated + GREATEST(current_memory, 0) AS current_memory, + GREATEST(total_memory_allocated, 0) AS total_memory_allocated FROM ` + sysSchema + `.x$user_summary ` diff --git a/collector/sys_user_summary_by_statement_latency.go b/collector/sys_user_summary_by_statement_latency.go new file mode 100644 index 000000000..9493bef2f --- /dev/null +++ b/collector/sys_user_summary_by_statement_latency.go @@ -0,0 +1,120 @@ +package collector + +import ( + "context" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +type ScrapeSysUserSummaryByStatementLatency struct{} + +func (ScrapeSysUserSummaryByStatementLatency) Name() string { + return "sys.user_summary_by_statement_latency" +} +func (ScrapeSysUserSummaryByStatementLatency) Help() string { + return "Collect metrics from sys.x$user_summary_by_statement_latency." +} +func (ScrapeSysUserSummaryByStatementLatency) Version() float64 { return 5.7 } + +// Metric name stem to match sys_user_summary.go style. +const userSummaryByStmtLatencyStem = "user_summary_by_statement_latency" + +// Descriptors (namespace=sys schema; names include the stem above). +var ( + sysUSSBLStatementsTotal = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_total"), + "The total number of statements for the user.", + []string{"user"}, nil, + ) + sysUSSBLTotalLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_latency"), + "The total wait time of timed statements for the user (seconds).", + []string{"user"}, nil, + ) + sysUSSBLMaxLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_max_latency"), + "The maximum single-statement latency for the user (seconds).", + []string{"user"}, nil, + ) + sysUSSBLLockLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_lock_latency"), + "The total time spent waiting for locks for the user (seconds).", + []string{"user"}, nil, + ) + sysUSSBLCpuLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_cpu_latency"), + "The total CPU time spent by statements for the user (seconds).", + []string{"user"}, nil, + ) + sysUSSBLRowsSent = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_rows_sent_total"), + "The total number of rows sent by statements for the user.", + []string{"user"}, nil, + ) + sysUSSBLRowsExamined = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_rows_examined_total"), + "The total number of rows examined by statements for the user.", + []string{"user"}, nil, + ) + sysUSSBLRowsAffected = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_rows_affected_total"), + "The total number of rows affected by statements for the user.", + []string{"user"}, nil, + ) + sysUSSBLFullScans = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtLatencyStem+"_full_scans_total"), + "The total number of full table scans by statements for the user.", + []string{"user"}, nil, + ) +) + +func (ScrapeSysUserSummaryByStatementLatency) Scrape( + ctx context.Context, + inst *instance, + ch chan<- prometheus.Metric, + _ *slog.Logger, +) error { + const q = ` +SELECT + user, + total, + total_latency, + max_latency, + lock_latency, + cpu_latency, + rows_sent, + rows_examined, + rows_affected, + full_scans +FROM sys.x$user_summary_by_statement_latency` + + rows, err := inst.db.QueryContext(ctx, q) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var ( + user string + total uint64 + totalPs, maxPs, lockPs, cpuPs uint64 + rowsSent, rowsExam, rowsAff, fscs uint64 + ) + if err := rows.Scan(&user, &total, &totalPs, &maxPs, &lockPs, &cpuPs, &rowsSent, &rowsExam, &rowsAff, &fscs); err != nil { + return err + } + + ch <- prometheus.MustNewConstMetric(sysUSSBLStatementsTotal, prometheus.GaugeValue, float64(total), user) + ch <- prometheus.MustNewConstMetric(sysUSSBLTotalLatency, prometheus.GaugeValue, float64(totalPs)/picoSeconds, user) + ch <- prometheus.MustNewConstMetric(sysUSSBLMaxLatency, prometheus.GaugeValue, float64(maxPs)/picoSeconds, user) + ch <- prometheus.MustNewConstMetric(sysUSSBLLockLatency, prometheus.GaugeValue, float64(lockPs)/picoSeconds, user) + ch <- prometheus.MustNewConstMetric(sysUSSBLCpuLatency, prometheus.GaugeValue, float64(cpuPs)/picoSeconds, user) + ch <- prometheus.MustNewConstMetric(sysUSSBLRowsSent, prometheus.GaugeValue, float64(rowsSent), user) + ch <- prometheus.MustNewConstMetric(sysUSSBLRowsExamined, prometheus.GaugeValue, float64(rowsExam), user) + ch <- prometheus.MustNewConstMetric(sysUSSBLRowsAffected, prometheus.GaugeValue, float64(rowsAff), user) + ch <- prometheus.MustNewConstMetric(sysUSSBLFullScans, prometheus.GaugeValue, float64(fscs), user) + } + return rows.Err() +} diff --git a/collector/sys_user_summary_by_statement_latency_test.go b/collector/sys_user_summary_by_statement_latency_test.go new file mode 100644 index 000000000..f488006c8 --- /dev/null +++ b/collector/sys_user_summary_by_statement_latency_test.go @@ -0,0 +1,115 @@ +package collector + +import ( + "context" + "database/sql/driver" + "strconv" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" + "github.com/smartystreets/goconvey/convey" +) + +func TestScrapeSysUserSummaryByStatementLatency(t *testing.T) { + // Sanity check + if (ScrapeSysUserSummaryByStatementLatency{}).Name() != "sys.user_summary_by_statement_latency" { + t.Fatalf("unexpected Name()") + } + + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("error opening a stub database connection: %s", err) + } + defer db.Close() + inst := &instance{db: db} + + columns := []string{ + "user", + "total", + "total_latency", + "max_latency", + "lock_latency", + "cpu_latency", + "rows_sent", + "rows_examined", + "rows_affected", + "full_scans", + } + rows := sqlmock.NewRows(columns) + + queryResults := [][]driver.Value{ + // user, total, total_latency(ps), max_latency(ps), lock_latency(ps), cpu_latency(ps), rows_sent, rows_examined, rows_affected, full_scans + {"app", "10", "120", "300", "40", "50", "1000", "2000", "300", "7"}, + {"background", "2", "0", "0", "0", "0", "0", "0", "0", "0"}, + } + for _, r := range queryResults { + rows.AddRow(r...) + } + + // Pass regex as STRING (raw literal); sqlmock compiles it internally. + mock.ExpectQuery(`(?s)SELECT\s+.*\s+FROM\s+sys\.x\$user_summary_by_statement_latency\s*`). + WillReturnRows(rows) + + // Expected metrics (emission order per row) + expected := []MetricResult{} + for _, r := range queryResults { + u := r[0].(string) + parse := func(s string) float64 { + f, err := strconv.ParseFloat(s, 64) + if err != nil { + t.Fatalf("parse error: %v", err) + } + return f + } + total := parse(r[1].(string)) + totalLat := parse(r[2].(string)) / picoSeconds + maxLat := parse(r[3].(string)) / picoSeconds + lockLat := parse(r[4].(string)) / picoSeconds + cpuLat := parse(r[5].(string)) / picoSeconds + rowsSent := parse(r[6].(string)) + rowsExam := parse(r[7].(string)) + rowsAff := parse(r[8].(string)) + fullScans := parse(r[9].(string)) + + lbl := labelMap{"user": u} + mt := dto.MetricType_GAUGE + + expected = append(expected, + MetricResult{labels: lbl, value: total, metricType: mt}, + MetricResult{labels: lbl, value: totalLat, metricType: mt}, + MetricResult{labels: lbl, value: maxLat, metricType: mt}, + MetricResult{labels: lbl, value: lockLat, metricType: mt}, + MetricResult{labels: lbl, value: cpuLat, metricType: mt}, + MetricResult{labels: lbl, value: rowsSent, metricType: mt}, + MetricResult{labels: lbl, value: rowsExam, metricType: mt}, + MetricResult{labels: lbl, value: rowsAff, metricType: mt}, + MetricResult{labels: lbl, value: fullScans, metricType: mt}, + ) + } + + ch := make(chan prometheus.Metric) + go func() { + if err := (ScrapeSysUserSummaryByStatementLatency{}).Scrape(context.Background(), inst, ch, promslog.NewNopLogger()); err != nil { + t.Errorf("scrape error: %s", err) + } + close(ch) + }() + + convey.Convey("Metrics comparison (user_summary_by_statement_latency)", t, func() { + for i, exp := range expected { + m, ok := <-ch + if !ok { + t.Fatalf("metrics channel closed early at index %d", i) + } + got := readMetric(m) + convey.So(exp, convey.ShouldResemble, got) + } + }) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet SQL expectations: %s", err) + } +} diff --git a/collector/sys_user_summary_by_statement_type.go b/collector/sys_user_summary_by_statement_type.go new file mode 100644 index 000000000..dd11c9137 --- /dev/null +++ b/collector/sys_user_summary_by_statement_type.go @@ -0,0 +1,119 @@ +package collector + +import ( + "context" + "log/slog" + + "github.com/prometheus/client_golang/prometheus" +) + +type ScrapeSysUserSummaryByStatementType struct{} + +func (ScrapeSysUserSummaryByStatementType) Name() string { return "sys.user_summary_by_statement_type" } +func (ScrapeSysUserSummaryByStatementType) Help() string { + return "Collect metrics from sys.x$user_summary_by_statement_type." +} +func (ScrapeSysUserSummaryByStatementType) Version() float64 { return 5.7 } + +// Metric name stem to match sys_user_summary.go style. +const userSummaryByStmtTypeStem = "user_summary_by_statement_type" + +// Descriptors. +var ( + sysUSSTStatementsTotal = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_total"), + "The total number of occurrences of the statement type for the user.", + []string{"user", "statement"}, nil, + ) + sysUSSTTotalLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_latency"), + "The total wait time of timed occurrences for the user and statement type (seconds).", + []string{"user", "statement"}, nil, + ) + sysUSSTMaxLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_max_latency"), + "The maximum single-statement latency for the user and statement type (seconds).", + []string{"user", "statement"}, nil, + ) + sysUSSTLockLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_lock_latency"), + "The total time spent waiting for locks for the user and statement type (seconds).", + []string{"user", "statement"}, nil, + ) + sysUSSTCpuLatency = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_cpu_latency"), + "The total CPU time for the user and statement type (seconds).", + []string{"user", "statement"}, nil, + ) + sysUSSTRowsSent = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_rows_sent_total"), + "The total number of rows sent for the user and statement type.", + []string{"user", "statement"}, nil, + ) + sysUSSTRowsExamined = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_rows_examined_total"), + "The total number of rows examined for the user and statement type.", + []string{"user", "statement"}, nil, + ) + sysUSSTRowsAffected = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_rows_affected_total"), + "The total number of rows affected for the user and statement type.", + []string{"user", "statement"}, nil, + ) + sysUSSTFullScans = prometheus.NewDesc( + prometheus.BuildFQName(namespace, sysSchema, userSummaryByStmtTypeStem+"_full_scans_total"), + "The total number of full table scans for the user and statement type.", + []string{"user", "statement"}, nil, + ) +) + +func (ScrapeSysUserSummaryByStatementType) Scrape( + ctx context.Context, + inst *instance, + ch chan<- prometheus.Metric, + _ *slog.Logger, +) error { + const q = ` +SELECT + user, + statement, + total, + total_latency, + max_latency, + lock_latency, + cpu_latency, + rows_sent, + rows_examined, + rows_affected, + full_scans +FROM sys.x$user_summary_by_statement_type` + + rows, err := inst.db.QueryContext(ctx, q) + if err != nil { + return err + } + defer rows.Close() + + for rows.Next() { + var ( + user, stmt string + total uint64 + totalPs, maxPs, lockPs, cpuPs uint64 + rowsSent, rowsExam, rowsAff, fscs uint64 + ) + if err := rows.Scan(&user, &stmt, &total, &totalPs, &maxPs, &lockPs, &cpuPs, &rowsSent, &rowsExam, &rowsAff, &fscs); err != nil { + return err + } + + ch <- prometheus.MustNewConstMetric(sysUSSTStatementsTotal, prometheus.GaugeValue, float64(total), user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTTotalLatency, prometheus.GaugeValue, float64(totalPs)/picoSeconds, user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTMaxLatency, prometheus.GaugeValue, float64(maxPs)/picoSeconds, user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTLockLatency, prometheus.GaugeValue, float64(lockPs)/picoSeconds, user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTCpuLatency, prometheus.GaugeValue, float64(cpuPs)/picoSeconds, user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTRowsSent, prometheus.GaugeValue, float64(rowsSent), user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTRowsExamined, prometheus.GaugeValue, float64(rowsExam), user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTRowsAffected, prometheus.GaugeValue, float64(rowsAff), user, stmt) + ch <- prometheus.MustNewConstMetric(sysUSSTFullScans, prometheus.GaugeValue, float64(fscs), user, stmt) + } + return rows.Err() +} diff --git a/collector/sys_user_summary_by_statement_type_test.go b/collector/sys_user_summary_by_statement_type_test.go new file mode 100644 index 000000000..a5ced88a1 --- /dev/null +++ b/collector/sys_user_summary_by_statement_type_test.go @@ -0,0 +1,116 @@ +package collector + +import ( + "context" + "database/sql/driver" + "strconv" + "testing" + + "github.com/DATA-DOG/go-sqlmock" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/prometheus/common/promslog" + "github.com/smartystreets/goconvey/convey" +) + +func TestScrapeSysUserSummaryByStatementType(t *testing.T) { + if (ScrapeSysUserSummaryByStatementType{}).Name() != "sys.user_summary_by_statement_type" { + t.Fatalf("unexpected Name()") + } + + db, mock, err := sqlmock.New() + if err != nil { + t.Fatalf("error opening a stub database connection: %s", err) + } + defer db.Close() + inst := &instance{db: db} + + columns := []string{ + "user", + "statement", + "total", + "total_latency", + "max_latency", + "lock_latency", + "cpu_latency", + "rows_sent", + "rows_examined", + "rows_affected", + "full_scans", + } + rows := sqlmock.NewRows(columns) + + queryResults := [][]driver.Value{ + // user, statement, total, total_latency(ps), max_latency(ps), lock_latency(ps), cpu_latency(ps), + // rows_sent, rows_examined, rows_affected, full_scans + {"app", "SELECT", "5", "100", "200", "10", "20", "500", "1000", "200", "3"}, + {"app", "INSERT", "2", "50", "80", "5", "8", "50", "0", "2", "0"}, + } + for _, r := range queryResults { + rows.AddRow(r...) + } + + mock.ExpectQuery(`(?s)SELECT\s+.*\s+FROM\s+sys\.x\$user_summary_by_statement_type\s*`). + WillReturnRows(rows) + + expected := []MetricResult{} + for _, r := range queryResults { + user := r[0].(string) + stmt := r[1].(string) + parse := func(s string) float64 { + f, err := strconv.ParseFloat(s, 64) + if err != nil { + t.Fatalf("parse error: %v", err) + } + return f + } + + total := parse(r[2].(string)) + totalLat := parse(r[3].(string)) / picoSeconds + maxLat := parse(r[4].(string)) / picoSeconds + lockLat := parse(r[5].(string)) / picoSeconds + cpuLat := parse(r[6].(string)) / picoSeconds + rowsSent := parse(r[7].(string)) + rowsExam := parse(r[8].(string)) + rowsAff := parse(r[9].(string)) + fullScans := parse(r[10].(string)) + + lbl := labelMap{"user": user, "statement": stmt} + mt := dto.MetricType_GAUGE + + expected = append(expected, + MetricResult{labels: lbl, value: total, metricType: mt}, + MetricResult{labels: lbl, value: totalLat, metricType: mt}, + MetricResult{labels: lbl, value: maxLat, metricType: mt}, + MetricResult{labels: lbl, value: lockLat, metricType: mt}, + MetricResult{labels: lbl, value: cpuLat, metricType: mt}, + MetricResult{labels: lbl, value: rowsSent, metricType: mt}, + MetricResult{labels: lbl, value: rowsExam, metricType: mt}, + MetricResult{labels: lbl, value: rowsAff, metricType: mt}, + MetricResult{labels: lbl, value: fullScans, metricType: mt}, + ) + } + + ch := make(chan prometheus.Metric) + go func() { + if err := (ScrapeSysUserSummaryByStatementType{}).Scrape(context.Background(), inst, ch, promslog.NewNopLogger()); err != nil { + t.Errorf("scrape error: %s", err) + } + close(ch) + }() + + convey.Convey("Metrics comparison (user_summary_by_statement_type)", t, func() { + for i, exp := range expected { + m, ok := <-ch + if !ok { + t.Fatalf("metrics channel closed early at index %d", i) + } + got := readMetric(m) + convey.So(exp, convey.ShouldResemble, got) + } + }) + + if err := mock.ExpectationsWereMet(); err != nil { + t.Errorf("unmet SQL expectations: %s", err) + } +} diff --git a/collector/sys_user_summary_test.go b/collector/sys_user_summary_test.go index 38d569f60..ba10314c2 100644 --- a/collector/sys_user_summary_test.go +++ b/collector/sys_user_summary_test.go @@ -16,7 +16,6 @@ package collector import ( "context" "database/sql/driver" - "regexp" "strconv" "testing" @@ -38,7 +37,7 @@ func TestScrapeSysUserSummary(t *testing.T) { columns := []string{ "user", - "statemets", + "statements", "statement_latency", "table_scans", "file_ios", @@ -100,6 +99,9 @@ func TestScrapeSysUserSummary(t *testing.T) { if i == 2 || i == 5 { value = value / picoSeconds } + if (i == 9 || i == 10) && value < 0 { + value = 0 + } expectedMetrics = append(expectedMetrics, MetricResult{ labels: labelMap{"user": user.(string)}, value: value, @@ -108,7 +110,7 @@ func TestScrapeSysUserSummary(t *testing.T) { } } - mock.ExpectQuery(sanitizeQuery(regexp.QuoteMeta(sysUserSummaryQuery))).WillReturnRows(rows) + mock.ExpectQuery(`(?s)SELECT\s+.*\s+FROM\s+sys\.x\$user_summary\s*`).WillReturnRows(rows) ch := make(chan prometheus.Metric) diff --git a/mysqld_exporter.go b/mysqld_exporter.go index 5e09320bc..7791eb70f 100644 --- a/mysqld_exporter.go +++ b/mysqld_exporter.go @@ -104,6 +104,8 @@ var scrapers = map[collector.Scraper]bool{ collector.ScrapePerfReplicationGroupMemberStats{}: false, collector.ScrapePerfReplicationApplierStatsByWorker{}: false, collector.ScrapeSysUserSummary{}: false, + collector.ScrapeSysUserSummaryByStatementLatency{}: false, + collector.ScrapeSysUserSummaryByStatementType{}: false, collector.ScrapeUserStat{}: false, collector.ScrapeClientStat{}: false, collector.ScrapeTableStat{}: false, From e627d71f0b10da9488d3213e03eaec436525d6f3 Mon Sep 17 00:00:00 2001 From: Sergei Klochkov Date: Thu, 11 Sep 2025 16:34:42 +0100 Subject: [PATCH 2/2] test(integration): add Compose-based MySQL harness and collector test runner; docs Add a self-contained integration test that spins up MySQL 8.4 with performance_schema, seeds a small workload, and validates collectors via a locally-built exporter image. What's included: - docker-compose.yml (fixed network name: mysql-test) - mysql/conf.d/perf-schema.cnf - mysql/initdb/01-users.sql (exporter/app users, grants) - seed/seed.sh (INSERT/SELECT/UPDATE/SLEEP loop) - test_compose_collectors.sh: * builds mysqld-exporter:local * ensures users (caching_sha2_password) * runs exporter per --collect.* flag (no host port binding) * curls /metrics from inside the network * per-flag log files under ./_testlogs/ * robust readiness + failure diagnostics * optional auto-discovery of collector flags from --help Docs (README.md): - Document new flags: --collect.sys.user_summary_by_statement_latency --collect.sys.user_summary_by_statement_type * list emitted metric families and units - Note clamping behavior in --collect.sys.user_summary - Add "Docker Compose integration test" section with usage Usage: ./test_compose_collectors.sh Artifacts: _testlogs/exporter_.log License: Added the mandatory license comments to the newly-committed source files. This commit touches only test infra and docs; no exporter runtime code changes. Signed-off-by: Sergei Klochkov --- .gitignore | 1 + README.md | 24 ++ .../sys_user_summary_by_statement_latency.go | 13 + ..._user_summary_by_statement_latency_test.go | 13 + .../sys_user_summary_by_statement_type.go | 13 + ...sys_user_summary_by_statement_type_test.go | 13 + collector/sys_user_summary_test.go | 4 +- docker-compose.yml | 43 +++ test-files/mysql/conf.d/perf-schema.cnf | 6 + test-files/mysql/initdb/01-users.sql | 11 + test-files/seed/seed.sh | 45 ++++ test_compose_collectors.sh | 250 ++++++++++++++++++ 12 files changed, 434 insertions(+), 2 deletions(-) create mode 100644 docker-compose.yml create mode 100644 test-files/mysql/conf.d/perf-schema.cnf create mode 100644 test-files/mysql/initdb/01-users.sql create mode 100755 test-files/seed/seed.sh create mode 100755 test_compose_collectors.sh diff --git a/.gitignore b/.gitignore index 5c3bd96be..6d52ba6e7 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ .idea *.iml /vendor +_testlogs/ diff --git a/README.md b/README.md index dedbb70a4..4579d2ceb 100644 --- a/README.md +++ b/README.md @@ -206,6 +206,30 @@ docker run -d \ prom/mysqld-exporter ``` +## Docker Compose integration test + +A self-contained test harness is included to validate collectors against a local MySQL: + +- Spins up **MySQL 8.4** with `performance_schema` on +- Seeds basic workload so `sys` summaries have data +- Builds and runs your **local** exporter image per collector flag +- Captures exporter logs per test under `_testlogs/` +- Verifies metrics via in-network HTTP (no host port binding) + +**Prereqs:** Docker & Docker Compose v2. + +**Files:** +- `docker-compose.yml` (MySQL service + one-shot seeder) +- `mysql/conf.d/perf-schema.cnf` (ensures P_S consumers on) +- `mysql/initdb/01-users.sql` (creates `exporter` & `app`; grants) +- `seed/seed.sh` (simple INSERT/SELECT/UPDATE/SLEEP loop) +- `test_compose_collectors.sh` (runner) + +**Run:** +```bash +./test_compose_collectors.sh +``` + ## heartbeat With `collect.heartbeat` enabled, mysqld_exporter will scrape replication delay diff --git a/collector/sys_user_summary_by_statement_latency.go b/collector/sys_user_summary_by_statement_latency.go index 9493bef2f..a847357e7 100644 --- a/collector/sys_user_summary_by_statement_latency.go +++ b/collector/sys_user_summary_by_statement_latency.go @@ -1,3 +1,16 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package collector import ( diff --git a/collector/sys_user_summary_by_statement_latency_test.go b/collector/sys_user_summary_by_statement_latency_test.go index f488006c8..b43d4a92d 100644 --- a/collector/sys_user_summary_by_statement_latency_test.go +++ b/collector/sys_user_summary_by_statement_latency_test.go @@ -1,3 +1,16 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package collector import ( diff --git a/collector/sys_user_summary_by_statement_type.go b/collector/sys_user_summary_by_statement_type.go index dd11c9137..88cd8b22f 100644 --- a/collector/sys_user_summary_by_statement_type.go +++ b/collector/sys_user_summary_by_statement_type.go @@ -1,3 +1,16 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package collector import ( diff --git a/collector/sys_user_summary_by_statement_type_test.go b/collector/sys_user_summary_by_statement_type_test.go index a5ced88a1..dd8c8f5ac 100644 --- a/collector/sys_user_summary_by_statement_type_test.go +++ b/collector/sys_user_summary_by_statement_type_test.go @@ -1,3 +1,16 @@ +// Copyright 2022 The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package collector import ( diff --git a/collector/sys_user_summary_test.go b/collector/sys_user_summary_test.go index ba10314c2..f245a9814 100644 --- a/collector/sys_user_summary_test.go +++ b/collector/sys_user_summary_test.go @@ -100,8 +100,8 @@ func TestScrapeSysUserSummary(t *testing.T) { value = value / picoSeconds } if (i == 9 || i == 10) && value < 0 { - value = 0 - } + value = 0 + } expectedMetrics = append(expectedMetrics, MetricResult{ labels: labelMap{"user": user.(string)}, value: value, diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 000000000..1283de50c --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,43 @@ +services: + mysql: + image: mysql:8.4 + environment: + MYSQL_ROOT_PASSWORD: rootpass + MYSQL_DATABASE: testdb + MYSQL_USER: app + MYSQL_PASSWORD: app + command: ["--performance_schema=ON"] + healthcheck: + test: ["CMD-SHELL", "mysqladmin ping -h 127.0.0.1 -uroot -prootpass || exit 1"] + interval: 5s + timeout: 3s + retries: 30 + volumes: + - ./test-files/mysql/conf.d:/etc/mysql/conf.d:ro + - ./test-files/mysql/initdb:/docker-entrypoint-initdb.d:ro + networks: + mysql-test: + aliases: + - mysql # ensure 'mysql' DNS name exists on this network + + seed: + image: mysql:8.4 + depends_on: + mysql: + condition: service_healthy + environment: + MYSQL_HOST: mysql + MYSQL_USER: app + MYSQL_PASSWORD: app + MYSQL_DATABASE: testdb + volumes: + - ./test-files/seed/seed.sh:/seed/seed.sh:ro + entrypoint: ["/bin/bash", "/seed/seed.sh"] + networks: [mysql-test] + +networks: + mysql-test: + name: mysql-test + driver: bridge + external: true + diff --git a/test-files/mysql/conf.d/perf-schema.cnf b/test-files/mysql/conf.d/perf-schema.cnf new file mode 100644 index 000000000..5c52a3a22 --- /dev/null +++ b/test-files/mysql/conf.d/perf-schema.cnf @@ -0,0 +1,6 @@ +[mysqld] +performance_schema=ON +# The following consumers are typically on by default in 8.x; keeping explicit for clarity. +performance_schema_consumer_events_statements_history=ON +performance_schema_consumer_events_statements_history_long=ON +performance_schema_consumer_events_statements_current=ON diff --git a/test-files/mysql/initdb/01-users.sql b/test-files/mysql/initdb/01-users.sql new file mode 100644 index 000000000..5d0b96956 --- /dev/null +++ b/test-files/mysql/initdb/01-users.sql @@ -0,0 +1,11 @@ +CREATE USER IF NOT EXISTS 'exporter'@'%' IDENTIFIED BY 'exporter'; +-- Force the plugin in case the user existed from a previous run with a different plugin +ALTER USER 'exporter'@'%' IDENTIFIED WITH caching_sha2_password BY 'exporter'; +GRANT PROCESS, REPLICATION CLIENT, SELECT ON *.* TO 'exporter'@'%'; + +-- App user for seeding +CREATE USER IF NOT EXISTS 'app'@'%' IDENTIFIED BY 'app'; +CREATE DATABASE IF NOT EXISTS testdb; +GRANT ALL PRIVILEGES ON testdb.* TO 'app'@'%'; + +FLUSH PRIVILEGES; diff --git a/test-files/seed/seed.sh b/test-files/seed/seed.sh new file mode 100755 index 000000000..f6f07faa5 --- /dev/null +++ b/test-files/seed/seed.sh @@ -0,0 +1,45 @@ +#!/usr/bin/env bash +set -euo pipefail + +host="${MYSQL_HOST:-mysql}" +user="${MYSQL_USER:-app}" +pass="${MYSQL_PASSWORD:-app}" +db="${MYSQL_DATABASE:-testdb}" + +# Helper to run mysql client quietly +mysqlq() { + mysql -h "$host" -u "$user" "-p$pass" -D "$db" -ss -N -e "$1" >/dev/null 2>&1 +} + +echo "Seed: waiting for DNS for host '${host}'…" +for i in {1..60}; do + if getent hosts "$host" > /dev/null 2>&1; then + echo "Seed: DNS OK ($(getent hosts "$host" | awk '{print $1}' | head -n1))" + break + fi + sleep 1 + [[ $i -eq 60 ]] && { echo "Seed: DNS for '$host' not found"; exit 1; } +done + +echo "Seed: waiting for MySQL TCP on ${host}:3306…" +for i in {1..60}; do + if mysqladmin ping -h "$host" -u"$user" -p"$pass" --silent 2>/dev/null; then + echo "Seed: MySQL is up." + break + fi + sleep 1 + [[ $i -eq 60 ]] && { echo "Seed: MySQL not reachable"; exit 1; } +done + +# Ensure table exists (idempotent) +mysqlq "CREATE TABLE IF NOT EXISTS t1 (id INT PRIMARY KEY AUTO_INCREMENT, v INT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP) ENGINE=InnoDB;" + +# Generate some mixed statements under the 'app' user +for i in $(seq 1 200); do + mysqlq "INSERT INTO t1 (v) VALUES (FLOOR(RAND()*1000));" + mysqlq "SELECT COUNT(*) FROM t1;" + mysqlq "UPDATE t1 SET v = v + 1 WHERE id % 10 = 0;" + mysqlq "SELECT SLEEP(0.01);" +done + +echo "Seed workload finished." diff --git a/test_compose_collectors.sh b/test_compose_collectors.sh new file mode 100755 index 000000000..a518d0efa --- /dev/null +++ b/test_compose_collectors.sh @@ -0,0 +1,250 @@ +#!/usr/bin/env bash +set -euo pipefail + +# No host port mapping anymore — we query the exporter from inside the docker network. +IMAGE_TAG="mysqld-exporter:local" +NETWORK_NAME="mysql-test" +MYSQL_ADDR="mysql:3306" +EXPORTER_NAME="mysqld_exporter_test_exporter" +METRICS_URL_NET="http://${EXPORTER_NAME}:9104/metrics" + +declare -A TESTS + +# Core +TESTS["collect.global_status"]="^mysql_global_status_" +TESTS["collect.global_variables"]="^mysql_global_variables_" + +# SYS schema +TESTS["collect.sys.user_summary"]="^mysql_sys_statements_total" +TESTS["collect.sys.user_summary_by_statement_latency"]="^mysql_sys_user_summary_by_statement_latency" +TESTS["collect.sys.user_summary_by_statement_type"]="^mysql_sys_user_summary_by_statement_type" + +# INFORMATION_SCHEMA +TESTS["collect.info_schema.innodb_cmp"]="^mysql_info_schema_innodb_cmp" +TESTS["collect.info_schema.innodb_cmpmem"]="^mysql_info_schema_innodb_cmpmem" +TESTS["collect.info_schema.innodb_metrics"]="^mysql_info_schema_innodb_metrics" +TESTS["collect.info_schema.processlist"]="^mysql_info_schema_processlist_threads" + +# ENGINE / replication (will be present but may be empty on single-instance; still safe to grep family) +TESTS["collect.engine_innodb_status"]="^mysql_engine_innodb_queries_in_queue" + +# Performance Schema (commonly available on 5.7/8.x with P_S on) +TESTS["collect.perf_schema.eventsstatements"]="^mysql_perf_schema_events_statements_total" +TESTS["collect.perf_schema.eventsstatementssum"]="^mysql_perf_schema_events_statements_sum_total" + +LOG_DIR="${LOG_DIR:-./_testlogs}" +mkdir -p "${LOG_DIR}" + +log() { printf '%s\n' "$*" >&2; } +need() { command -v "$1" >/dev/null 2>&1 || { log "Missing: $1"; exit 1; }; } +compose() { if docker compose version >/dev/null 2>&1; then docker compose "$@"; else docker-compose "$@"; fi; } + +# Curl inside the docker network (avoids host port binds). +curl_in_net() { + docker run --rm --network "${NETWORK_NAME}" curlimages/curl:8.8.0 \ + curl -sS -m 3 -f "$1" +} + +wait_for_metrics_net() { + local url="$1" retries="${2:-30}" sleep_s="${3:-1}" + for _ in $(seq 1 "$retries"); do + if curl_in_net "$url" >/dev/null 2>&1; then return 0; fi + sleep "$sleep_s" + done + return 1 +} + +dns_probe_mysql() { + for _ in $(seq 1 30); do + if docker run --rm --network "${NETWORK_NAME}" busybox:1.36 nslookup mysql >/dev/null 2>&1; then + return 0 + fi + sleep 1 + done + return 1 +} + +exporter_logs() { + local name="$1" + log "---- exporter container state ----" + docker inspect -f 'Name={{.Name}} Status={{.State.Status}} ExitCode={{.State.ExitCode}} OOMKilled={{.State.OOMKilled}} Error={{.State.Error}} StartedAt={{.State.StartedAt}} FinishedAt={{.State.FinishedAt}}' "$name" 2>/dev/null || true + log "---- exporter logs (last 200 lines) ----" + docker logs --tail=200 "$name" 2>&1 || true + log "----------------------------------------" +} + +mysql_logs() { compose logs --no-color --tail=200 mysql || true; } + +build_binary() { + export CGO_ENABLED=0 + mkdir -p .build/linux-amd64/ + go build -o .build/linux-amd64/mysqld_exporter +} + +build_local_image() { + log "▶ Building local exporter image: ${IMAGE_TAG}" + docker build -t "${IMAGE_TAG}" . +} + +up_stack() { + log "▶ Bringing up MySQL…" + compose up -d mysql + + log "⏳ Waiting for MySQL to be healthy…" + for _ in $(seq 1 60); do + st="$(docker inspect -f '{{.State.Health.Status}}' "$(compose ps -q mysql)" 2>/dev/null || echo "unknown")" + [[ "$st" == "healthy" ]] && break + sleep 1 + done + + log "⏳ Probing DNS for 'mysql' on '${NETWORK_NAME}'…" + dns_probe_mysql || { log "DNS for 'mysql' not resolving"; docker network inspect "${NETWORK_NAME}" || true; exit 1; } + + log "▶ Ensuring monitoring/app users and grants…" + compose exec -T mysql sh -lc ' + mysql -uroot -prootpass < "${path}" </dev/null 2>&1 || true + + local cnf_path; cnf_path="$(make_temp_cnf)" + + log "▶ Starting exporter with --${flag}" + local cid + cid="$( + docker run -d \ + --name "${EXPORTER_NAME}" \ + --network "${NETWORK_NAME}" \ + -v "${cnf_path}:/cfg/my.cnf:ro" \ + "${IMAGE_TAG}" \ + --web.listen-address=":9104" \ + --log.level=debug \ + --config.my-cnf=/cfg/my.cnf \ + --mysqld.address="${MYSQL_ADDR}" \ + --"${flag}" + )" + + # Tail logs to a local file (background) + LOG_FILE="${LOG_DIR}/exporter_${flag//./_}.log" + docker logs -f "${EXPORTER_NAME}" >"${LOG_FILE}" 2>&1 & + TAIL_PID="$!" + + # Wait for /metrics inside the docker network + if ! wait_for_metrics_net "${METRICS_URL_NET}" 30 1; then + echo "${flag}: FAIL (exporter did not become ready)" + log "---- exporter log file tail (${LOG_FILE}) ----" + tail -n 200 "${LOG_FILE}" 2>/dev/null || true + exporter_logs "${EXPORTER_NAME}" + log "---- mysql logs (tail) ----" + mysql_logs + return 1 + fi + + printf '%s' "$cid" +} + +stop_exporter_tail() { [[ -n "${TAIL_PID:-}" ]] && kill "${TAIL_PID}" >/dev/null 2>&1 || true; unset TAIL_PID; } + +smoke_test_mysql_auth() { + log "▶ Smoke-testing exporter credentials from client…" + docker run --rm --network "${NETWORK_NAME}" mysql:8.4 \ + mysql -h mysql -uexporter -pexporter -e 'SELECT @@version;' >/dev/null +} + +# Get metrics content from inside the network (avoids host port) +get_metrics_net() { + curl_in_net "${METRICS_URL_NET}" +} + +test_flag() { + local flag="$1" pattern="$2" + + local cid + if ! cid="$(run_exporter_with_flag "$flag")"; then + stop_exporter_tail + return 1 + fi + + LOG_FILE="${LOG_DIR}/exporter_${flag//./_}.log" + + # Pull metrics and test + if get_metrics_net | grep -E "${pattern}" >/dev/null; then + echo "${flag}: PASS" + stop_exporter_tail + docker rm -f "${EXPORTER_NAME}" >/dev/null 2>&1 || true + return 0 + else + echo "${flag}: FAIL (pattern not found: ${pattern})" + log "---- first 30 mysql_* metrics ----" + get_metrics_net | grep '^mysql_' | head -n 30 || true + log "---- exporter log file tail ----" + tail -n 200 "${LOG_FILE}" 2>/dev/null || true + exporter_logs "${EXPORTER_NAME}" + stop_exporter_tail + docker rm -f "${EXPORTER_NAME}" >/dev/null 2>&1 || true + return 1 + fi +} + +need docker; need curl +build_binary +build_local_image +up_stack + +if ! smoke_test_mysql_auth; then + log "Auth FAILED" + mysql_logs + down_stack + exit 1 +else + log "Auth OK" +fi + +pass=0 +fail=0 +for flag in ${!TESTS[@]}; do + if test_flag "$flag" "${TESTS[${flag}]}"; then + pass=$((pass+1)) + else + fail=$((fail+1)) + fi +done + +echo; echo "==== Summary ===="; echo "PASS: ${pass}"; echo "FAIL: ${fail}" +down_stack +[[ "$fail" -eq 0 ]] +