Skip to content

[ws-manager] Add workspace class to metrics #10376

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 31, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion components/ws-manager/pkg/manager/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ func (m *Manager) StartWorkspace(ctx context.Context, req *api.StartWorkspaceReq
OwnerToken: startContext.OwnerToken,
}

m.metrics.OnWorkspaceStarted(req.Type)
m.metrics.OnWorkspaceStarted(req.Type, req.Spec.Class)

return okResponse, nil
}
Expand Down
27 changes: 14 additions & 13 deletions components/ws-manager/pkg/manager/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,21 +60,21 @@ func newMetrics(m *Manager) *metrics {
Help: "time it took for workspace pods to reach the running phase",
// same as components/ws-manager-bridge/src/prometheus-metrics-exporter.ts#L15
Buckets: prometheus.ExponentialBuckets(2, 2, 10),
}, []string{"type"}),
}, []string{"type", "class"}),
initializeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsWorkspaceSubsystem,
Name: "workspace_initialize_seconds",
Help: "time it took to initialize workspace",
Buckets: prometheus.ExponentialBuckets(2, 2, 10),
}, []string{"type"}),
}, []string{"type", "class"}),
finalizeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsWorkspaceSubsystem,
Name: "workspace_finalize_seconds",
Help: "time it took to finalize workspace",
Buckets: prometheus.ExponentialBuckets(2, 2, 10),
}, []string{"type"}),
}, []string{"type", "class"}),
volumeSnapshotTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: metricsNamespace,
Subsystem: metricsWorkspaceSubsystem,
Expand All @@ -87,13 +87,13 @@ func newMetrics(m *Manager) *metrics {
Subsystem: metricsWorkspaceSubsystem,
Name: "workspace_starts_total",
Help: "total number of workspaces started",
}, []string{"type"}),
}, []string{"type", "class"}),
totalStopsCounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: metricsNamespace,
Subsystem: metricsWorkspaceSubsystem,
Name: "workspace_stops_total",
Help: "total number of workspaces stopped",
}, []string{"reason", "type"}),
}, []string{"reason", "type", "class"}),
totalOpenPortGauge: prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: metricsNamespace,
Subsystem: metricsWorkspaceSubsystem,
Expand Down Expand Up @@ -163,9 +163,9 @@ func (m *metrics) Register(reg prometheus.Registerer) error {
return nil
}

func (m *metrics) OnWorkspaceStarted(tpe api.WorkspaceType) {
func (m *metrics) OnWorkspaceStarted(tpe api.WorkspaceType, class string) {
nme := api.WorkspaceType_name[int32(tpe)]
counter, err := m.totalStartsCounterVec.GetMetricWithLabelValues(nme)
counter, err := m.totalStartsCounterVec.GetMetricWithLabelValues(nme, class)
if err != nil {
log.WithError(err).WithField("type", tpe).Warn("cannot get counter for workspace start metric")
return
Expand Down Expand Up @@ -198,7 +198,7 @@ func (m *metrics) OnChange(status *api.WorkspaceStatus) {
}

t := status.Metadata.StartedAt.AsTime()
hist, err := m.startupTimeHistVec.GetMetricWithLabelValues(tpe)
hist, err := m.startupTimeHistVec.GetMetricWithLabelValues(tpe, status.Spec.Class)
if err != nil {
log.WithError(err).WithField("type", tpe).Warn("cannot get startup time histogram metric")
return
Expand All @@ -217,7 +217,7 @@ func (m *metrics) OnChange(status *api.WorkspaceStatus) {
reason = "regular-stop"
}

counter, err := m.totalStopsCounterVec.GetMetricWithLabelValues(reason, tpe)
counter, err := m.totalStopsCounterVec.GetMetricWithLabelValues(reason, tpe, status.Spec.Class)
if err != nil {
log.WithError(err).WithField("reason", reason).Warn("cannot get counter for workspace stops metric")
return
Expand All @@ -240,7 +240,7 @@ func newPhaseTotalVec(m *Manager) *phaseTotalVec {
name := prometheus.BuildFQName(metricsNamespace, metricsWorkspaceSubsystem, "workspace_phase_total")
return &phaseTotalVec{
name: name,
desc: prometheus.NewDesc(name, "Current number of workspaces per phase", []string{"phase", "type"}, prometheus.Labels(map[string]string{})),
desc: prometheus.NewDesc(name, "Current number of workspaces per phase", []string{"phase", "type", "class"}, prometheus.Labels(map[string]string{})),
manager: m,
}
}
Expand Down Expand Up @@ -276,16 +276,17 @@ func (m *phaseTotalVec) Collect(ch chan<- prometheus.Metric) {
}
status := api.WorkspacePhase_name[int32(rawStatus.Phase)]
tpe := api.WorkspaceType_name[int32(rawStatus.Spec.Type)]
class := rawStatus.Spec.Class

counts[tpe+"::"+status]++
counts[tpe+"::"+status+"::"+class]++
}

for key, cnt := range counts {
segs := strings.Split(key, "::")
tpe, phase := segs[0], segs[1]
tpe, phase, class := segs[0], segs[1], segs[2]

// metrics cannot be re-used, we have to create them every single time
metric, err := prometheus.NewConstMetric(m.desc, prometheus.GaugeValue, float64(cnt), phase, tpe)
metric, err := prometheus.NewConstMetric(m.desc, prometheus.GaugeValue, float64(cnt), phase, tpe, class)
if err != nil {
log.WithError(err).Warnf("cannot create workspace metric - %s will be inaccurate", m.name)
continue
Expand Down
7 changes: 4 additions & 3 deletions components/ws-manager/pkg/manager/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -791,8 +791,9 @@ func (m *Monitor) initializeWorkspaceContent(ctx context.Context, pod *corev1.Po
} else {
err = handleGRPCError(ctx, err)
}
wsType := pod.Labels[wsk8s.TypeLabel]
hist, errHist := m.manager.metrics.initializeTimeHistVec.GetMetricWithLabelValues(wsType)
wsType := strings.ToUpper(pod.Labels[wsk8s.TypeLabel])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why convert to upper case?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

to make it consistent with the other metrics: #10376 (comment)

It's easier to make that one uppercase considering that the rest is, too

Copy link
Contributor

@jenting jenting May 31, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, I was confused by the screenshot in the PR description, probably missed to update it.

wsClass := pod.Labels[workspaceClassLabel]
hist, errHist := m.manager.metrics.initializeTimeHistVec.GetMetricWithLabelValues(wsType, wsClass)
if errHist != nil {
log.WithError(errHist).WithField("type", wsType).Warn("cannot get initialize time histogram metric")
} else {
Expand Down Expand Up @@ -1166,7 +1167,7 @@ func (m *Monitor) finalizeWorkspaceContent(ctx context.Context, wso *workspaceOb
break
}

hist, err := m.manager.metrics.finalizeTimeHistVec.GetMetricWithLabelValues(wsType)
hist, err := m.manager.metrics.finalizeTimeHistVec.GetMetricWithLabelValues(wsType, wso.Pod.Labels[workspaceClassLabel])
if err != nil {
log.WithError(err).WithField("type", wsType).Warn("cannot get finalize time histogram metric")
} else {
Expand Down