Skip to content

Commit d979b73

Browse files
committed
[ws-daemon] Plugin for PSI metrics
1 parent e1d5013 commit d979b73

File tree

4 files changed

+101
-4
lines changed

4 files changed

+101
-4
lines changed

components/common-go/cgroups/cgroup.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@ import (
1414

1515
"github.com/containerd/cgroups"
1616
v2 "github.com/containerd/cgroups/v2"
17-
"github.com/gitpod-io/gitpod/common-go/log"
1817
)
1918

2019
const DefaultMountPoint = "/sys/fs/cgroup"
@@ -105,7 +104,6 @@ func ReadPSIValue(path string) (PSI, error) {
105104
return PSI{}, fmt.Errorf("could not find total stalled time")
106105
}
107106

108-
log.Infof("total is %v", line[i+6:])
109107
total, err := strconv.ParseUint(line[i+6:], 10, 64)
110108
if err != nil {
111109
return PSI{}, fmt.Errorf("could not parse total stalled time: %w", err)

components/ws-daemon/pkg/cgroup/cgroup.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ func (host *PluginHost) WorkspaceAdded(ctx context.Context, ws *dispatch.Workspa
8787
opts := &PluginOptions{
8888
BasePath: host.CGroupBasePath,
8989
CgroupPath: cgroupPath,
90-
InstanceID: ws.InstanceID,
90+
InstanceId: ws.InstanceID,
9191
}
9292

9393
for _, plg := range host.Plugins {
@@ -128,5 +128,5 @@ const (
128128
type PluginOptions struct {
129129
BasePath string
130130
CgroupPath string
131-
InstanceID string
131+
InstanceId string
132132
}
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
// Copyright (c) 2022 Gitpod GmbH. All rights reserved.
2+
// Licensed under the GNU Affero General Public License (AGPL).
3+
// See License-AGPL.txt in the project root for license information.
4+
5+
package cgroup
6+
7+
import (
8+
"context"
9+
"os"
10+
"path/filepath"
11+
"time"
12+
13+
cgroups "github.com/gitpod-io/gitpod/common-go/cgroups/v2"
14+
"github.com/prometheus/client_golang/prometheus"
15+
)
16+
17+
type PSIMetrics struct {
18+
cpu *prometheus.GaugeVec
19+
memory *prometheus.GaugeVec
20+
io *prometheus.GaugeVec
21+
nodeName string
22+
}
23+
24+
func NewPSIMetrics(prom prometheus.Registerer) *PSIMetrics {
25+
p := &PSIMetrics{
26+
cpu: prometheus.NewGaugeVec(prometheus.GaugeOpts{
27+
Name: "workspace_cpu_psi_total_seconds",
28+
Help: "Total time spent under cpu pressure in microseconds",
29+
}, []string{"node", "workspace", "kind"}),
30+
31+
memory: prometheus.NewGaugeVec(prometheus.GaugeOpts{
32+
Name: "workspace_memory_psi_total_seconds",
33+
Help: "Total time spent under memory pressure in microseconds",
34+
}, []string{"node", "workspace", "kind"}),
35+
36+
io: prometheus.NewGaugeVec(prometheus.GaugeOpts{
37+
Name: "workspace_io_psi_total_seconds",
38+
Help: "Total time spent under io pressure in microseconds",
39+
}, []string{"node", "workspace", "kind"}),
40+
41+
nodeName: os.Getenv("NODENAME"),
42+
}
43+
44+
prom.MustRegister(
45+
p.cpu,
46+
p.memory,
47+
p.io,
48+
)
49+
50+
return p
51+
}
52+
53+
func (p *PSIMetrics) Name() string { return "psi-metrics" }
54+
func (p *PSIMetrics) Type() Version { return Version2 }
55+
56+
func (p *PSIMetrics) Apply(ctx context.Context, opts *PluginOptions) error {
57+
fullPath := filepath.Join(opts.BasePath, opts.CgroupPath)
58+
if _, err := os.Stat(fullPath); err != nil {
59+
return err
60+
}
61+
62+
cpu := cgroups.NewCpuController(fullPath)
63+
memory := cgroups.NewMemoryController(fullPath)
64+
io := cgroups.NewIOController(fullPath)
65+
66+
go func() {
67+
ticker := time.NewTicker(10 * time.Second)
68+
defer ticker.Stop()
69+
70+
for {
71+
select {
72+
case <-ticker.C:
73+
p.scrape(cpu, memory, io, opts.InstanceId)
74+
case <-ctx.Done():
75+
return
76+
}
77+
}
78+
}()
79+
80+
return nil
81+
}
82+
83+
func (p *PSIMetrics) scrape(cpu *cgroups.Cpu, memory *cgroups.Memory, io *cgroups.IO, instanceID string) {
84+
if psi, err := cpu.PSI(); err == nil {
85+
p.cpu.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))
86+
p.cpu.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))
87+
}
88+
89+
if psi, err := memory.PSI(); err == nil {
90+
p.memory.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))
91+
p.memory.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))
92+
}
93+
94+
if psi, err := io.PSI(); err == nil {
95+
p.io.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some))
96+
p.io.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full))
97+
}
98+
}

components/ws-daemon/pkg/daemon/daemon.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ func NewDaemon(config Config, reg prometheus.Registerer) (*Daemon, error) {
8787
},
8888
},
8989
procV2Plugin,
90+
cgroup.NewPSIMetrics(reg),
9091
)
9192
if err != nil {
9293
return nil, err

0 commit comments

Comments
 (0)