Skip to content

Commit d461c5a

Browse files
committed
In _data/v25.3/metrics/export, copied crdb_metrics.yaml, shared_metrics.yaml, and tenant_metrics.yaml from https://github.com/cockroachlabs/managed-service/tree/master/pkg/otel/assets.
In available-metrics-in-metrics-list.csv, added 6 rebalancing.* metrics from crdb_metrics.yaml. In metric-names.md, modified to (a) use metrics.yaml for Description, Unit and Type, (b) use crdb_metrics.yaml, shared_metrics.yaml, and tenant_metrics.yaml for Supported Deployments.
1 parent a62b657 commit d461c5a

File tree

5 files changed

+386
-11
lines changed

5 files changed

+386
-11
lines changed

src/current/_data/v25.3/metrics/available-metrics-in-metrics-list.csv

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -476,4 +476,10 @@ auth.ldap.conn.latency
476476
auth.password.conn.latency
477477
auth.scram.conn.latency
478478
sql.exec.latency.detail
479-
sql.query.unique.count
479+
sql.query.unique.count
480+
rebalancing.cpunanospersecond
481+
rebalancing.lease.transfers
482+
rebalancing.range.rebalances
483+
rebalancing.replicas.cpunanospersecond
484+
rebalancing.replicas.queriespersecond
485+
rebalancing.state.imbalanced_overfull_options_exhausted
Lines changed: 220 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,220 @@
1+
# CrdbCluster-specific metrics. The metrics in this list will be combined with
2+
# the ones in shared_metrics.yaml.
3+
#
4+
# Each metric is mapped to its appropriate aggregation function (sum, max, or mean)
5+
# which is used to aggregate metrics by specific labels.
6+
metrics:
7+
addsstable_applications: sum
8+
addsstable_copies: sum
9+
addsstable_proposals: sum
10+
admission_wait_sum_kv: sum
11+
admission_wait_sum_kv_stores: sum
12+
admission_wait_sum_sql_kv_response: sum
13+
admission_wait_sum_sql_sql_response: sum
14+
capacity: max
15+
capacity_available: max
16+
capacity_reserved: max
17+
capacity_used: max
18+
exec_error: sum
19+
exec_latency: sum
20+
exec_success: sum
21+
gcbytesage: mean
22+
gossip_bytes_received: sum
23+
gossip_bytes_sent: sum
24+
gossip_connections_incoming: sum
25+
gossip_connections_outgoing: sum
26+
gossip_connections_refused: sum
27+
gossip_infos_received: sum
28+
gossip_infos_sent: sum
29+
intentage: mean
30+
intentbytes: sum
31+
intentcount: sum
32+
jobs_row_level_ttl_currently_paused: sum
33+
jobs_row_level_ttl_currently_running: sum
34+
jobs_row_level_ttl_delete_duration: sum
35+
jobs_row_level_ttl_num_active_spans: sum
36+
jobs_row_level_ttl_resume_completed: sum
37+
jobs_row_level_ttl_resume_failed: sum
38+
jobs_row_level_ttl_rows_deleted: sum
39+
jobs_row_level_ttl_rows_selected: sum
40+
jobs_row_level_ttl_select_duration: sum
41+
jobs_row_level_ttl_span_total_duration: sum
42+
jobs_row_level_ttl_total_expired_rows: sum
43+
jobs_row_level_ttl_total_rows: sum
44+
keybytes: sum
45+
keycount: sum
46+
leases_epoch: sum
47+
leases_error: sum
48+
leases_expiration: sum
49+
leases_success: sum
50+
leases_transfers_error: sum
51+
leases_transfers_success: sum
52+
livebytes: sum
53+
livecount: sum
54+
liveness_epochincrements: sum
55+
liveness_heartbeatfailures: sum
56+
liveness_heartbeatlatency: sum
57+
liveness_heartbeatsuccesses: sum
58+
liveness_livenodes: max
59+
physical_replication_logical_bytes: sum
60+
physical_replication_replicated_time_seconds: max
61+
physical_replication_sst_bytes: sum
62+
queue_consistency_pending: sum
63+
queue_consistency_process_failure: sum
64+
queue_consistency_process_success: sum
65+
queue_consistency_processingnanos: sum
66+
queue_gc_info_abortspanconsidered: sum
67+
queue_gc_info_abortspangcnum: sum
68+
queue_gc_info_abortspanscanned: sum
69+
queue_gc_info_intentsconsidered: sum
70+
queue_gc_info_intenttxns: sum
71+
queue_gc_info_numkeysaffected: sum
72+
queue_gc_info_pushtxn: sum
73+
queue_gc_info_resolvesuccess: sum
74+
queue_gc_info_resolvetotal: sum
75+
queue_gc_info_transactionspangcaborted: sum
76+
queue_gc_info_transactionspangccommitted: sum
77+
queue_gc_info_transactionspangcpending: sum
78+
queue_gc_info_transactionspanscanned: sum
79+
queue_gc_pending: sum
80+
queue_gc_process_failure: sum
81+
queue_gc_process_success: sum
82+
queue_gc_processingnanos: sum
83+
queue_raftlog_pending: sum
84+
queue_raftlog_process_failure: sum
85+
queue_raftlog_process_success: sum
86+
queue_raftlog_processingnanos: sum
87+
queue_raftsnapshot_pending: sum
88+
queue_raftsnapshot_process_failure: sum
89+
queue_raftsnapshot_process_success: sum
90+
queue_raftsnapshot_processingnanos: sum
91+
queue_replicagc_pending: sum
92+
queue_replicagc_process_failure: sum
93+
queue_replicagc_process_success: sum
94+
queue_replicagc_processingnanos: sum
95+
queue_replicagc_removereplica: sum
96+
queue_replicate_addreplica: sum
97+
queue_replicate_pending: sum
98+
queue_replicate_process_failure: sum
99+
queue_replicate_process_success: sum
100+
queue_replicate_processingnanos: sum
101+
queue_replicate_purgatory: sum
102+
queue_replicate_rebalancereplica: sum
103+
queue_replicate_removedeadreplica: sum
104+
queue_replicate_removereplica: sum
105+
queue_replicate_transferlease: sum
106+
queue_split_pending: sum
107+
queue_split_process_failure: sum
108+
queue_split_process_success: sum
109+
queue_split_processingnanos: sum
110+
queue_tsmaintenance_pending: sum
111+
queue_tsmaintenance_process_failure: sum
112+
queue_tsmaintenance_process_success: sum
113+
queue_tsmaintenance_processingnanos: sum
114+
raft_commandsapplied: sum
115+
raft_enqueued_pending: sum
116+
raft_heartbeats_pending: sum
117+
raft_process_commandcommit_latency: sum
118+
raft_process_logcommit_latency: sum
119+
raft_process_tickingnanos: sum
120+
raft_process_workingnanos: sum
121+
raft_rcvd_app: sum
122+
raft_rcvd_appresp: sum
123+
raft_rcvd_dropped: sum
124+
raft_rcvd_heartbeat: sum
125+
raft_rcvd_heartbeatresp: sum
126+
raft_rcvd_prevote: sum
127+
raft_rcvd_prevoteresp: sum
128+
raft_rcvd_prop: sum
129+
raft_rcvd_snap: sum
130+
raft_rcvd_timeoutnow: sum
131+
raft_rcvd_transferleader: sum
132+
raft_rcvd_vote: sum
133+
raft_rcvd_voteresp: sum
134+
raft_ticks: sum
135+
raftlog_behind: max
136+
raftlog_truncated: sum
137+
range_adds: sum
138+
range_raftleadertransfers: sum
139+
range_removes: sum
140+
range_snapshots_generated: sum
141+
range_splits: sum
142+
rangekeybytes: sum
143+
rangekeycount: sum
144+
ranges: sum
145+
ranges_overreplicated: sum
146+
ranges_unavailable: sum
147+
ranges_underreplicated: sum
148+
rangevalbytes: sum
149+
rangevalcount: sum
150+
rebalancing_cpunanospersecond: mean
151+
rebalancing_lease_transfers: sum
152+
rebalancing_queriespersecond: sum
153+
rebalancing_range_rebalances: sum
154+
rebalancing_readbytespersecond: sum
155+
rebalancing_readspersecond: sum
156+
rebalancing_replicas_cpunanospersecond: sum
157+
rebalancing_replicas_queriespersecond: sum
158+
rebalancing_requestspersecond: sum
159+
rebalancing_state_imbalanced_overfull_options_exhausted: sum
160+
rebalancing_writebytespersecond: sum
161+
rebalancing_writespersecond: sum
162+
replicas: sum
163+
replicas_leaders: sum
164+
replicas_leaders_not_leaseholders: sum
165+
replicas_leaseholders: sum
166+
replicas_quiescent: sum
167+
replicas_reserved: sum
168+
requests_backpressure_split: sum
169+
requests_slow_lease: sum
170+
requests_slow_raft: sum
171+
rocksdb_block_cache_hits: sum
172+
rocksdb_block_cache_misses: sum
173+
rocksdb_block_cache_pinned_usage: sum
174+
rocksdb_block_cache_usage: sum
175+
rocksdb_bloom_filter_prefix_checked: sum
176+
rocksdb_bloom_filter_prefix_useful: sum
177+
rocksdb_compactions: sum
178+
rocksdb_flushes: sum
179+
rocksdb_memtable_total_size: sum
180+
rocksdb_num_sstables: sum
181+
rocksdb_read_amplification: mean
182+
rocksdb_table_readers_mem_estimate: sum
183+
schedules_scheduled_row_level_ttl_executor_failed: sum
184+
security_certificate_expiration_ca: max
185+
storage_l0_num_files: sum
186+
storage_l0_sublevels: sum
187+
sys_cgo_allocbytes: sum
188+
sys_cgo_totalbytes: sum
189+
sys_cgocalls: sum
190+
sys_cpu_combined_percent_normalized: mean
191+
sys_cpu_sys_ns: sum
192+
sys_cpu_sys_percent: mean
193+
sys_cpu_user_ns: sum
194+
sys_cpu_user_percent: mean
195+
sys_fd_open: sum
196+
sys_fd_softlimit: max
197+
sys_gc_count: sum
198+
sys_gc_pause_ns: sum
199+
sys_gc_pause_percent: mean
200+
sys_go_allocbytes: sum
201+
sys_go_totalbytes: sum
202+
sys_goroutines: sum
203+
sys_host_disk_iopsinprogress: sum
204+
sys_host_disk_read_bytes: sum
205+
sys_host_disk_read_count: sum
206+
sys_host_disk_write_bytes: sum
207+
sys_host_disk_write_count: sum
208+
sys_host_net_recv_bytes: sum
209+
sys_host_net_send_bytes: sum
210+
sys_rss: sum
211+
sys_runnable_goroutines_per_cpu: mean
212+
sys_totalmem: max
213+
sysbytes: sum
214+
syscount: sum
215+
timeseries_write_bytes: sum
216+
timeseries_write_errors: sum
217+
timeseries_write_samples: sum
218+
totalbytes: sum
219+
valbytes: sum
220+
valcount: sum
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
# This shared list serves as a base list for all clusters that use metrics
2+
# export. It was originally taken from [1], and has been modified for CC's use
3+
# case over time.
4+
#
5+
# In this shared list, we have removed all system and storage related metrics,
6+
# with the exception of `sys_uptime`. For CRDB or tenant specific metrics, see
7+
# crdb_metrics.yaml and tenant_metrics.yaml.
8+
#
9+
# NOTE: When adding a new metric to this list, please ensure that it applies to
10+
# both CRDB and tenant clusters, or else they should go into the cluster
11+
# specific list.
12+
#
13+
# Each metric is mapped to its appropriate aggregation function (sum, max, or mean)
14+
# which is used to aggregate metrics by specific labels.
15+
#
16+
# [1]: https://www.cockroachlabs.com/docs/stable/essential-metrics-dedicated
17+
metrics:
18+
changefeed_backfill_count: sum
19+
changefeed_backfill_pending_ranges: sum
20+
changefeed_commit_latency: sum
21+
changefeed_emitted_messages: sum
22+
changefeed_error_retries: sum
23+
changefeed_failures: sum
24+
changefeed_max_behind_nanos: max
25+
changefeed_message_size_hist: sum
26+
changefeed_running: sum
27+
clock_offset_meannanos: mean
28+
clock_offset_stddevnanos: mean
29+
distsender_batches: sum
30+
distsender_batches_partial: sum
31+
distsender_errors_notleaseholder: sum
32+
distsender_rpc_sent: sum
33+
distsender_rpc_sent_local: sum
34+
distsender_rpc_sent_nextreplicaerror: sum
35+
jobs_changefeed_resume_retry_error: sum
36+
requests_slow_distsender: sum
37+
round_trip_latency: sum
38+
schedules_BACKUP_failed: sum
39+
schedules_BACKUP_last_completed_time: max
40+
schedules_BACKUP_started: sum
41+
schedules_BACKUP_succeeded: sum
42+
sql_bytesin: sum
43+
sql_bytesout: sum
44+
sql_conn_latency: sum
45+
sql_conns: sum
46+
sql_ddl_count: sum
47+
sql_delete_count: sum
48+
sql_distsql_contended_queries_count: sum
49+
sql_distsql_exec_latency: sum
50+
sql_distsql_flows_active: sum
51+
sql_distsql_flows_total: sum
52+
sql_distsql_queries_active: sum
53+
sql_distsql_queries_total: sum
54+
sql_distsql_select_count: sum
55+
sql_distsql_service_latency: sum
56+
sql_exec_latency: sum
57+
sql_failure_count: sum
58+
sql_full_scan_count: sum
59+
sql_insert_count: sum
60+
sql_mem_distsql_current: sum
61+
sql_mem_distsql_max: max
62+
sql_mem_internal_session_current: sum
63+
sql_mem_internal_session_max: max
64+
sql_mem_internal_txn_current: sum
65+
sql_mem_internal_txn_max: max
66+
sql_mem_root_current: sum
67+
sql_misc_count: sum
68+
sql_new_conns: sum
69+
sql_query_count: sum
70+
sql_select_count: sum
71+
sql_service_latency: sum
72+
sql_statements_active: sum
73+
sql_txn_abort_count: sum
74+
sql_txn_begin_count: sum
75+
sql_txn_commit_count: sum
76+
sql_txn_latency: sum
77+
sql_txn_rollback_count: sum
78+
sql_txns_open: sum
79+
sql_update_count: sum
80+
sys_uptime: max
81+
txn_aborts: sum
82+
txn_commits: sum
83+
txn_commits1PC: sum
84+
txn_durations: sum
85+
txn_restarts: sum
86+
txn_restarts_serializable: sum
87+
txn_restarts_writetooold: sum
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
# CrdbTenant-specific metrics. The metrics in this list will be combined with
2+
# the ones in shared_metrics.yaml.
3+
#
4+
# Each metric is mapped to an aggregation function that indicates how the metric
5+
# should be aggregated when needed
6+
#
7+
# Note: The aggregation functions here are declared as noop because they are
8+
# only applied for UA-enabled clusters. For regular tenant clusters, these
9+
# values are not used but are included for consistency with the overall data
10+
# structure.
11+
metrics:
12+
tenant_sql_usage_request_units: noop
13+
tenant_sql_usage_kv_request_units: noop
14+
tenant_sql_usage_read_batches: noop
15+
tenant_sql_usage_read_requests: noop
16+
tenant_sql_usage_read_bytes: noop
17+
tenant_sql_usage_write_batches: noop
18+
tenant_sql_usage_write_requests: noop
19+
tenant_sql_usage_write_bytes: noop
20+
tenant_sql_usage_sql_pods_cpu_seconds: noop
21+
tenant_sql_usage_pgwire_egress_bytes: noop
22+
tenant_sql_usage_external_io_ingress_bytes: noop
23+
tenant_sql_usage_external_io_egress_bytes: noop
24+
tenant_sql_usage_cross_region_network_ru: noop
25+
tenant_sql_usage_estimated_cpu_seconds: noop
26+
tenant_sql_usage_provisioned_vcpus: noop
27+
sql_aggregated_livebytes: noop

0 commit comments

Comments
 (0)