diff --git a/components/ws-manager/pkg/manager/manager.go b/components/ws-manager/pkg/manager/manager.go index d6b5ef70581896..5ff8cea835090c 100644 --- a/components/ws-manager/pkg/manager/manager.go +++ b/components/ws-manager/pkg/manager/manager.go @@ -341,7 +341,7 @@ func (m *Manager) StartWorkspace(ctx context.Context, req *api.StartWorkspaceReq OwnerToken: startContext.OwnerToken, } - m.metrics.OnWorkspaceStarted(req.Type) + m.metrics.OnWorkspaceStarted(req.Type, req.Spec.Class) return okResponse, nil } diff --git a/components/ws-manager/pkg/manager/metrics.go b/components/ws-manager/pkg/manager/metrics.go index 59c2718020e27c..3680fb8715ef99 100644 --- a/components/ws-manager/pkg/manager/metrics.go +++ b/components/ws-manager/pkg/manager/metrics.go @@ -60,21 +60,21 @@ func newMetrics(m *Manager) *metrics { Help: "time it took for workspace pods to reach the running phase", // same as components/ws-manager-bridge/src/prometheus-metrics-exporter.ts#L15 Buckets: prometheus.ExponentialBuckets(2, 2, 10), - }, []string{"type"}), + }, []string{"type", "class"}), initializeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, Name: "workspace_initialize_seconds", Help: "time it took to initialize workspace", Buckets: prometheus.ExponentialBuckets(2, 2, 10), - }, []string{"type"}), + }, []string{"type", "class"}), finalizeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, Name: "workspace_finalize_seconds", Help: "time it took to finalize workspace", Buckets: prometheus.ExponentialBuckets(2, 2, 10), - }, []string{"type"}), + }, []string{"type", "class"}), volumeSnapshotTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, @@ -87,13 +87,13 @@ func newMetrics(m *Manager) *metrics { Subsystem: metricsWorkspaceSubsystem, Name: "workspace_starts_total", Help: "total number of workspaces started", - }, []string{"type"}), + }, []string{"type", "class"}), totalStopsCounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, Name: "workspace_stops_total", Help: "total number of workspaces stopped", - }, []string{"reason", "type"}), + }, []string{"reason", "type", "class"}), totalOpenPortGauge: prometheus.NewGaugeFunc(prometheus.GaugeOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, @@ -163,9 +163,9 @@ func (m *metrics) Register(reg prometheus.Registerer) error { return nil } -func (m *metrics) OnWorkspaceStarted(tpe api.WorkspaceType) { +func (m *metrics) OnWorkspaceStarted(tpe api.WorkspaceType, class string) { nme := api.WorkspaceType_name[int32(tpe)] - counter, err := m.totalStartsCounterVec.GetMetricWithLabelValues(nme) + counter, err := m.totalStartsCounterVec.GetMetricWithLabelValues(nme, class) if err != nil { log.WithError(err).WithField("type", tpe).Warn("cannot get counter for workspace start metric") return @@ -198,7 +198,7 @@ func (m *metrics) OnChange(status *api.WorkspaceStatus) { } t := status.Metadata.StartedAt.AsTime() - hist, err := m.startupTimeHistVec.GetMetricWithLabelValues(tpe) + hist, err := m.startupTimeHistVec.GetMetricWithLabelValues(tpe, status.Spec.Class) if err != nil { log.WithError(err).WithField("type", tpe).Warn("cannot get startup time histogram metric") return @@ -217,7 +217,7 @@ func (m *metrics) OnChange(status *api.WorkspaceStatus) { reason = "regular-stop" } - counter, err := m.totalStopsCounterVec.GetMetricWithLabelValues(reason, tpe) + counter, err := m.totalStopsCounterVec.GetMetricWithLabelValues(reason, tpe, status.Spec.Class) if err != nil { log.WithError(err).WithField("reason", reason).Warn("cannot get counter for workspace stops metric") return @@ -240,7 +240,7 @@ func newPhaseTotalVec(m *Manager) *phaseTotalVec { name := prometheus.BuildFQName(metricsNamespace, metricsWorkspaceSubsystem, "workspace_phase_total") return &phaseTotalVec{ name: name, - desc: prometheus.NewDesc(name, "Current number of workspaces per phase", []string{"phase", "type"}, prometheus.Labels(map[string]string{})), + desc: prometheus.NewDesc(name, "Current number of workspaces per phase", []string{"phase", "type", "class"}, prometheus.Labels(map[string]string{})), manager: m, } } @@ -276,16 +276,17 @@ func (m *phaseTotalVec) Collect(ch chan<- prometheus.Metric) { } status := api.WorkspacePhase_name[int32(rawStatus.Phase)] tpe := api.WorkspaceType_name[int32(rawStatus.Spec.Type)] + class := rawStatus.Spec.Class - counts[tpe+"::"+status]++ + counts[tpe+"::"+status+"::"+class]++ } for key, cnt := range counts { segs := strings.Split(key, "::") - tpe, phase := segs[0], segs[1] + tpe, phase, class := segs[0], segs[1], segs[2] // metrics cannot be re-used, we have to create them every single time - metric, err := prometheus.NewConstMetric(m.desc, prometheus.GaugeValue, float64(cnt), phase, tpe) + metric, err := prometheus.NewConstMetric(m.desc, prometheus.GaugeValue, float64(cnt), phase, tpe, class) if err != nil { log.WithError(err).Warnf("cannot create workspace metric - %s will be inaccurate", m.name) continue diff --git a/components/ws-manager/pkg/manager/monitor.go b/components/ws-manager/pkg/manager/monitor.go index fc45082a4ee336..026c5ffa92de9d 100644 --- a/components/ws-manager/pkg/manager/monitor.go +++ b/components/ws-manager/pkg/manager/monitor.go @@ -791,8 +791,9 @@ func (m *Monitor) initializeWorkspaceContent(ctx context.Context, pod *corev1.Po } else { err = handleGRPCError(ctx, err) } - wsType := pod.Labels[wsk8s.TypeLabel] - hist, errHist := m.manager.metrics.initializeTimeHistVec.GetMetricWithLabelValues(wsType) + wsType := strings.ToUpper(pod.Labels[wsk8s.TypeLabel]) + wsClass := pod.Labels[workspaceClassLabel] + hist, errHist := m.manager.metrics.initializeTimeHistVec.GetMetricWithLabelValues(wsType, wsClass) if errHist != nil { log.WithError(errHist).WithField("type", wsType).Warn("cannot get initialize time histogram metric") } else { @@ -1166,7 +1167,7 @@ func (m *Monitor) finalizeWorkspaceContent(ctx context.Context, wso *workspaceOb break } - hist, err := m.manager.metrics.finalizeTimeHistVec.GetMetricWithLabelValues(wsType) + hist, err := m.manager.metrics.finalizeTimeHistVec.GetMetricWithLabelValues(wsType, wso.Pod.Labels[workspaceClassLabel]) if err != nil { log.WithError(err).WithField("type", wsType).Warn("cannot get finalize time histogram metric") } else {