From 01b46ed99058bb60fabbd447fc7818a8a6f7e6d7 Mon Sep 17 00:00:00 2001 From: Milan Pavlik Date: Sun, 11 Sep 2022 21:23:23 +0000 Subject: [PATCH] [dashboards] Add gRPC / Server dashboard --- .../mixins/cross-teams/dashboards.libsonnet | 1 + .../dashboards/gitpod-grpc-server.json | 777 ++++++++++++++++++ 2 files changed, 778 insertions(+) create mode 100644 operations/observability/mixins/cross-teams/dashboards/gitpod-grpc-server.json diff --git a/operations/observability/mixins/cross-teams/dashboards.libsonnet b/operations/observability/mixins/cross-teams/dashboards.libsonnet index 41745cdce65a56..b26a369ca899b1 100644 --- a/operations/observability/mixins/cross-teams/dashboards.libsonnet +++ b/operations/observability/mixins/cross-teams/dashboards.libsonnet @@ -15,5 +15,6 @@ // 'my-new-dashboard.json': (import 'dashboards/components/new-component.json'), 'gitpod-cluster-autoscaler-k3s.json': (import 'dashboards/gitpod-cluster-autoscaler-k3s.json'), 'gitpod-node-resource-metrics.json': (import 'dashboards/gitpod-node-resource-metrics.json'), + 'gitpod-grpc-server.json': (import 'dashboards/gitpod-grpc-server.json'), }, } diff --git a/operations/observability/mixins/cross-teams/dashboards/gitpod-grpc-server.json b/operations/observability/mixins/cross-teams/dashboards/gitpod-grpc-server.json new file mode 100644 index 00000000000000..7e4649978553be --- /dev/null +++ b/operations/observability/mixins/cross-teams/dashboards/gitpod-grpc-server.json @@ -0,0 +1,777 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, + "type": "dashboard" + } + ] + }, + "description": "Common visualisations of gRPC (Golang) Server metrics", + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": 72, + "links": [], + "liveNow": false, + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 2, + "panels": [], + "title": "Requests", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "Rate of gRPC requests started", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 1 + }, + "id": 4, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum(increase(grpc_server_started_total{grpc_service=~\"$service\", grpc_type=~\"$type\"}[1m])) by (grpc_service, grpc_method)", + "legendFormat": "{{grpc_service}}.{{grpc_method}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests started [1m]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "Rate of requests completed, by status", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum(increase(grpc_server_handled_total{grpc_service=~\"$service\", grpc_method=~\"$method\", grpc_type=~\"$type\"}[1m])) by (grpc_service, grpc_method, grpc_code)", + "legendFormat": "{{grpc_service}}.{{grpc_method}} - {{grpc_code}}", + "range": true, + "refId": "A" + } + ], + "title": "Requests completed [1m]", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 17 + }, + "id": 11, + "panels": [], + "title": "Errors", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "Rate of errors, by status", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "opm" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 18 + }, + "id": 7, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum(increase(grpc_server_handled_total{grpc_service=~\"$service\", grpc_method=~\"$method\", grpc_code!=\"OK\", grpc_type=~\"$type\"}[1m])) by (grpc_service, grpc_method, grpc_code)", + "legendFormat": "{{grpc_service}}.{{grpc_method}} - {{grpc_code}}", + "range": true, + "refId": "A" + } + ], + "title": "Request errors [1m]", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 26 + }, + "id": 13, + "panels": [], + "title": "Latency", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 27 + }, + "id": 9, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, \n sum(rate(grpc_server_handling_seconds_bucket{grpc_type=\"unary\", grpc_service=~\"$service\", grpc_method=~\"$method\", grpc_type=\"unary\"}[5m])) by (grpc_service, grpc_method, le)\n)", + "legendFormat": "{{grpc_service}}.{{grpc_method}}", + "range": true, + "refId": "A" + } + ], + "title": "99th percentile response latency [5m]", + "type": "timeseries" + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 35 + }, + "id": 15, + "panels": [], + "title": "Messaging", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "Rate of messages received by server", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 36 + }, + "id": 17, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum(increase(grpc_server_msg_received_total{grpc_service=~\"$service\", grpc_method=~\"$method\", grpc_type=~\"$type\"}[1m])) by (grpc_service, grpc_method)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Server messages received [1m]", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "description": "Rate of messages sent by server", + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 36 + }, + "id": 18, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "editorMode": "code", + "expr": "sum(increase(grpc_server_msg_sent_total{grpc_service=~\"$service\", grpc_method=~\"$method\", grpc_type=~\"$type\"}[1m])) by (grpc_service, grpc_method)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Server messages sent [1m]", + "type": "timeseries" + } + ], + "schemaVersion": 37, + "style": "dark", + "tags": [ + "grpc", + "server" + ], + "templating": { + "list": [ + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "definition": "label_values(grpc_server_started_total, grpc_service)", + "description": "gRPC Service Name", + "hide": 0, + "includeAll": true, + "label": "Service", + "multi": true, + "name": "service", + "options": [], + "query": { + "query": "label_values(grpc_server_started_total, grpc_service)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "definition": "label_values(grpc_server_started_total, grpc_method)", + "description": "gRPC Method Name", + "hide": 0, + "includeAll": true, + "label": "Method", + "multi": true, + "name": "method", + "options": [], + "query": { + "query": "label_values(grpc_server_started_total, grpc_method)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "type": "query" + }, + { + "allValue": ".*", + "current": { + "selected": true, + "text": [ + "All" + ], + "value": [ + "$__all" + ] + }, + "datasource": { + "type": "prometheus", + "uid": "P4169E866C3094E38" + }, + "definition": "label_values(grpc_server_started_total, grpc_type)", + "description": "gRPC request type - bidirectional stream, server stream, unary, client_stream", + "hide": 0, + "includeAll": true, + "label": "Type", + "multi": true, + "name": "type", + "options": [], + "query": { + "query": "label_values(grpc_server_started_total, grpc_type)", + "refId": "StandardVariableQuery" + }, + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "sort": 5, + "type": "query" + } + ] + }, + "time": { + "from": "now-6h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ] + }, + "timezone": "utc", + "title": "gRPC / Server", + "uid": "fDH4_QMVk", + "version": 5, + "weekStart": "monday" +}