|
| 1 | +// Copyright (c) 2022 Gitpod GmbH. All rights reserved. |
| 2 | +// Licensed under the GNU Affero General Public License (AGPL). |
| 3 | +// See License-AGPL.txt in the project root for license information. |
| 4 | + |
| 5 | +package cgroup |
| 6 | + |
| 7 | +import ( |
| 8 | + "context" |
| 9 | + "os" |
| 10 | + "path/filepath" |
| 11 | + "time" |
| 12 | + |
| 13 | + cgroups "github.com/gitpod-io/gitpod/common-go/cgroups/v2" |
| 14 | + "github.com/prometheus/client_golang/prometheus" |
| 15 | +) |
| 16 | + |
| 17 | +type PSIMetrics struct { |
| 18 | + cpu *prometheus.GaugeVec |
| 19 | + memory *prometheus.GaugeVec |
| 20 | + io *prometheus.GaugeVec |
| 21 | + nodeName string |
| 22 | +} |
| 23 | + |
| 24 | +func NewPSIMetrics(prom prometheus.Registerer) *PSIMetrics { |
| 25 | + p := &PSIMetrics{ |
| 26 | + cpu: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 27 | + Name: "workspace_cpu_psi_total_seconds", |
| 28 | + Help: "Total time spent under cpu pressure in microseconds", |
| 29 | + }, []string{"node", "workspace", "kind"}), |
| 30 | + |
| 31 | + memory: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 32 | + Name: "workspace_memory_psi_total_seconds", |
| 33 | + Help: "Total time spent under memory pressure in microseconds", |
| 34 | + }, []string{"node", "workspace", "kind"}), |
| 35 | + |
| 36 | + io: prometheus.NewGaugeVec(prometheus.GaugeOpts{ |
| 37 | + Name: "workspace_io_psi_total_seconds", |
| 38 | + Help: "Total time spent under io pressure in microseconds", |
| 39 | + }, []string{"node", "workspace", "kind"}), |
| 40 | + |
| 41 | + nodeName: os.Getenv("NODENAME"), |
| 42 | + } |
| 43 | + |
| 44 | + prom.MustRegister( |
| 45 | + p.cpu, |
| 46 | + p.memory, |
| 47 | + p.io, |
| 48 | + ) |
| 49 | + |
| 50 | + return p |
| 51 | +} |
| 52 | + |
| 53 | +func (p *PSIMetrics) Name() string { return "psi-metrics" } |
| 54 | +func (p *PSIMetrics) Type() Version { return Version2 } |
| 55 | + |
| 56 | +func (p *PSIMetrics) Apply(ctx context.Context, opts *PluginOptions) error { |
| 57 | + fullPath := filepath.Join(opts.BasePath, opts.CgroupPath) |
| 58 | + if _, err := os.Stat(fullPath); err != nil { |
| 59 | + return err |
| 60 | + } |
| 61 | + |
| 62 | + cpu := cgroups.NewCpuController(fullPath) |
| 63 | + memory := cgroups.NewMemoryController(fullPath) |
| 64 | + io := cgroups.NewIOController(fullPath) |
| 65 | + |
| 66 | + go func() { |
| 67 | + ticker := time.NewTicker(10 * time.Second) |
| 68 | + defer ticker.Stop() |
| 69 | + |
| 70 | + for { |
| 71 | + select { |
| 72 | + case <-ticker.C: |
| 73 | + p.scrape(cpu, memory, io, opts.InstanceId) |
| 74 | + case <-ctx.Done(): |
| 75 | + return |
| 76 | + } |
| 77 | + } |
| 78 | + }() |
| 79 | + |
| 80 | + return nil |
| 81 | +} |
| 82 | + |
| 83 | +func (p *PSIMetrics) scrape(cpu *cgroups.Cpu, memory *cgroups.Memory, io *cgroups.IO, instanceID string) { |
| 84 | + if psi, err := cpu.PSI(); err == nil { |
| 85 | + p.cpu.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some)) |
| 86 | + p.cpu.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full)) |
| 87 | + } |
| 88 | + |
| 89 | + if psi, err := memory.PSI(); err == nil { |
| 90 | + p.memory.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some)) |
| 91 | + p.memory.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full)) |
| 92 | + } |
| 93 | + |
| 94 | + if psi, err := io.PSI(); err == nil { |
| 95 | + p.io.WithLabelValues(p.nodeName, instanceID, "some").Set(float64(psi.Some)) |
| 96 | + p.io.WithLabelValues(p.nodeName, instanceID, "full").Set(float64(psi.Full)) |
| 97 | + } |
| 98 | +} |
0 commit comments