Skip to content

Add referenced memory metric #2495

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Apr 22, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/cadvisor.go
Original file line number Diff line number Diff line change
@@ -87,6 +87,7 @@ var (
container.ProcessSchedulerMetrics: struct{}{},
container.ProcessMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
}}

// List of metrics that can be ignored.
@@ -101,6 +102,7 @@ var (
container.ProcessSchedulerMetrics: struct{}{},
container.ProcessMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
}
)

@@ -132,7 +134,7 @@ func (ml *metricSetValue) Set(value string) error {
}

func init() {
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb'.")
flag.Var(&ignoreMetrics, "disable_metrics", "comma-separated list of `metrics` to be disabled. Options are 'disk', 'diskIO', 'network', 'tcp', 'udp', 'percpu', 'sched', 'process', 'hugetlb', 'referenced_memory'.")

// Default logging verbosity to V(2)
flag.Set("v", "2")
7 changes: 7 additions & 0 deletions cmd/cadvisor_test.go
Original file line number Diff line number Diff line change
@@ -40,6 +40,12 @@ func TestUdpMetricsAreDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.NetworkUdpUsageMetrics))
}

func TestReferencedMemoryMetricsIsDisabledByDefault(t *testing.T) {
assert.True(t, ignoreMetrics.Has(container.ReferencedMemoryMetrics))
flag.Parse()
assert.True(t, ignoreMetrics.Has(container.ReferencedMemoryMetrics))
}

func TestIgnoreMetrics(t *testing.T) {
tests := []struct {
value string
@@ -86,6 +92,7 @@ func TestToIncludedMetrics(t *testing.T) {
container.AppMetrics: struct{}{},
container.HugetlbUsageMetrics: struct{}{},
container.PerfMetrics: struct{}{},
container.ReferencedMemoryMetrics: struct{}{},
},
container.AllMetrics,
{},
2 changes: 2 additions & 0 deletions container/factory.go
Original file line number Diff line number Diff line change
@@ -59,6 +59,7 @@ const (
ProcessMetrics MetricKind = "process"
HugetlbUsageMetrics MetricKind = "hugetlb"
PerfMetrics MetricKind = "perf_event"
ReferencedMemoryMetrics MetricKind = "referenced_memory"
)

// AllMetrics represents all kinds of metrics that cAdvisor supported.
@@ -79,6 +80,7 @@ var AllMetrics = MetricSet{
AppMetrics: struct{}{},
HugetlbUsageMetrics: struct{}{},
PerfMetrics: struct{}{},
ReferencedMemoryMetrics: struct{}{},
}

func (mk MetricKind) String() string {
115 changes: 113 additions & 2 deletions container/libcontainer/handler.go
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@ package libcontainer
import (
"bufio"
"encoding/json"
"flag"
"fmt"
"io"
"io/ioutil"
@@ -38,16 +39,27 @@ import (
"k8s.io/klog/v2"
)

var (
whitelistedUlimits = [...]string{"max_open_files"}
referencedResetInterval = flag.Uint64("referenced_reset_interval", 0,
"Reset interval for referenced bytes (container_referenced_bytes metric), number of measurement cycles after which referenced bytes are cleared, if set to 0 referenced bytes are never cleared (default: 0)")

smapsFilePathPattern = "/proc/%d/smaps"
clearRefsFilePathPattern = "/proc/%d/clear_refs"

referencedRegexp = regexp.MustCompile(`Referenced:\s*([0-9]+)\s*kB`)
isDigitRegExp = regexp.MustCompile("\\d+")
)

type Handler struct {
cgroupManager cgroups.Manager
rootFs string
pid int
includedMetrics container.MetricSet
pidMetricsCache map[int]*info.CpuSchedstat
cycles uint64
}

var whitelistedUlimits = [...]string{"max_open_files"}

func NewHandler(cgroupManager cgroups.Manager, rootFs string, pid int, includedMetrics container.MetricSet) *Handler {
return &Handler{
cgroupManager: cgroupManager,
@@ -81,6 +93,19 @@ func (h *Handler) GetStats() (*info.ContainerStats, error) {
}
}

if h.includedMetrics.Has(container.ReferencedMemoryMetrics) {
h.cycles++
pids, err := h.cgroupManager.GetPids()
if err != nil {
klog.V(4).Infof("Could not get PIDs for container %d: %v", h.pid, err)
} else {
stats.ReferencedMemory, err = referencedBytesStat(pids, h.cycles, *referencedResetInterval)
if err != nil {
klog.V(4).Infof("Unable to get referenced bytes: %v", err)
}
}
}

// If we know the pid then get network stats from /proc/<pid>/net/dev
if h.pid == 0 {
return stats, nil
@@ -318,6 +343,92 @@ func schedulerStatsFromProcs(rootFs string, pids []int, pidMetricsCache map[int]
return schedstats, nil
}

// referencedBytesStat gets and clears referenced bytes
// see: https://github.com/brendangregg/wss#wsspl-referenced-page-flag
func referencedBytesStat(pids []int, cycles uint64, resetInterval uint64) (uint64, error) {
referencedKBytes, err := getReferencedKBytes(pids)
if err != nil {
return uint64(0), err
}

err = clearReferencedBytes(pids, cycles, resetInterval)
if err != nil {
return uint64(0), err
}
return referencedKBytes * 1024, nil
}

func getReferencedKBytes(pids []int) (uint64, error) {
referencedKBytes := uint64(0)
readSmapsContent := false
foundMatch := false
for _, pid := range pids {
smapsFilePath := fmt.Sprintf(smapsFilePathPattern, pid)
smapsContent, err := ioutil.ReadFile(smapsFilePath)
if err != nil {
klog.V(5).Infof("Cannot read %s file, err: %s", smapsFilePath, err)
if os.IsNotExist(err) {
continue //smaps file does not exists for all PIDs
}
return 0, err
}
readSmapsContent = true

allMatches := referencedRegexp.FindAllSubmatch(smapsContent, -1)
if len(allMatches) == 0 {
klog.V(5).Infof("Not found any information about referenced bytes in %s file", smapsFilePath)
continue // referenced bytes may not exist in smaps file
}

for _, matches := range allMatches {
if len(matches) != 2 {
return 0, fmt.Errorf("failed to match regexp in output: %s", string(smapsContent))
}
foundMatch = true
referenced, err := strconv.ParseUint(string(matches[1]), 10, 64)
if err != nil {
return 0, err
}
referencedKBytes += referenced
}
}

if len(pids) != 0 {
if !readSmapsContent {
klog.Warningf("Cannot read smaps files for any PID from %s", "CONTAINER")
} else if !foundMatch {
klog.Warningf("Not found any information about referenced bytes in smaps files for any PID from %s", "CONTAINER")
}
}
return referencedKBytes, nil
}

func clearReferencedBytes(pids []int, cycles uint64, resetInterval uint64) error {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to make sure I understand, if clearReferencedBytes errors, we will wait another resetInterval cycles before attempting to reset again, right? If so, is that better than always trying to clear after an error?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're right, if clearReferencedBytes return error, we will wait another resetInterval but error occurs only if there is a serious issue in system (problem with writing into existing file or problem with closing previously opened file). I don't think that is a good idea to force clearing in case of errors.
User can easily observe if referenced bytes were cleared or not, seeing value of referenced bytes.

Since your last review I've introduced an option to switch off clearing of referenced bytes (setting referenced_reset_interval to 0). It's documented here. During experiments it's more user friendly to set 0 than very long reset interval if it's desired to observe referenced bytes in longer period.

if resetInterval == 0 {
return nil
}

if cycles%resetInterval == 0 {
for _, pid := range pids {
clearRefsFilePath := fmt.Sprintf(clearRefsFilePathPattern, pid)
clerRefsFile, err := os.OpenFile(clearRefsFilePath, os.O_WRONLY, 0644)
if err != nil {
// clear_refs file may not exist for all PIDs
continue
}
_, err = clerRefsFile.WriteString("1\n")
if err != nil {
return err
}
err = clerRefsFile.Close()
if err != nil {
return err
}
}
}
return nil
}

func networkStatsFromProc(rootFs string, pid int) ([]info.InterfaceStats, error) {
netStatsFile := path.Join(rootFs, "proc", strconv.Itoa(pid), "/net/dev")

85 changes: 85 additions & 0 deletions container/libcontainer/handler_test.go
Original file line number Diff line number Diff line change
@@ -22,6 +22,7 @@ import (
info "github.com/google/cadvisor/info/v1"
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/stretchr/testify/assert"
)

func TestScanInterfaceStats(t *testing.T) {
@@ -215,3 +216,87 @@ func TestParseLimitsFile(t *testing.T) {
}
}
}

func TestReferencedBytesStat(t *testing.T) {
//overwrite package variables
smapsFilePathPattern = "testdata/smaps%d"
clearRefsFilePathPattern = "testdata/clear_refs%d"

pids := []int{4, 6, 8}
stat, err := referencedBytesStat(pids, 1, 3)
assert.Nil(t, err)
assert.Equal(t, uint64(416*1024), stat)

clearRefsFiles := []string{
"testdata/clear_refs4",
"testdata/clear_refs6",
"testdata/clear_refs8"}

//check if clear_refs files have proper values
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[0]))
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[1]))
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[2]))
}

func TestReferencedBytesStatWhenNeverCleared(t *testing.T) {
//overwrite package variables
smapsFilePathPattern = "testdata/smaps%d"
clearRefsFilePathPattern = "testdata/clear_refs%d"

pids := []int{4, 6, 8}
stat, err := referencedBytesStat(pids, 1, 0)
assert.Nil(t, err)
assert.Equal(t, uint64(416*1024), stat)

clearRefsFiles := []string{
"testdata/clear_refs4",
"testdata/clear_refs6",
"testdata/clear_refs8"}

//check if clear_refs files have proper values
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[0]))
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[1]))
assert.Equal(t, "0\n", getFileContent(t, clearRefsFiles[2]))
}

func TestReferencedBytesStatWhenResetIsNeeded(t *testing.T) {
//overwrite package variables
smapsFilePathPattern = "testdata/smaps%d"
clearRefsFilePathPattern = "testdata/clear_refs%d"

pids := []int{4, 6, 8}
stat, err := referencedBytesStat(pids, 1, 1)
assert.Nil(t, err)
assert.Equal(t, uint64(416*1024), stat)

clearRefsFiles := []string{
"testdata/clear_refs4",
"testdata/clear_refs6",
"testdata/clear_refs8"}

//check if clear_refs files have proper values
assert.Equal(t, "1\n", getFileContent(t, clearRefsFiles[0]))
assert.Equal(t, "1\n", getFileContent(t, clearRefsFiles[1]))
assert.Equal(t, "1\n", getFileContent(t, clearRefsFiles[2]))

clearTestData(t, clearRefsFiles)
}

func TestGetReferencedKBytesWhenSmapsMissing(t *testing.T) {
//overwrite package variable
smapsFilePathPattern = "testdata/smaps%d"

pids := []int{10}
referenced, err := getReferencedKBytes(pids)
assert.Nil(t, err)
assert.Equal(t, uint64(0), referenced)
}

func TestClearReferencedBytesWhenClearRefsMissing(t *testing.T) {
//overwrite package variable
clearRefsFilePathPattern = "testdata/clear_refs%d"

pids := []int{10}
err := clearReferencedBytes(pids, 0, 1)
assert.Nil(t, err)
}
15 changes: 15 additions & 0 deletions container/libcontainer/helpers_test.go
Original file line number Diff line number Diff line change
@@ -16,13 +16,15 @@ package libcontainer

import (
"fmt"
"io/ioutil"
"path/filepath"
"reflect"
"sort"
"strings"
"testing"

"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/stretchr/testify/assert"
)

var defaultCgroupSubsystems = []string{
@@ -130,3 +132,16 @@ func assertCgroupSubsystemsEqual(t *testing.T, expected, actual CgroupSubsystems
t.Fatalf("%s Expected %v == %v", message, expected.Mounts, actual.Mounts)
}
}

func getFileContent(t *testing.T, filePath string) string {
fileContent, err := ioutil.ReadFile(filePath)
assert.Nil(t, err)
return string(fileContent)
}

func clearTestData(t *testing.T, clearRefsPaths []string) {
for _, clearRefsPath := range clearRefsPaths {
err := ioutil.WriteFile(clearRefsPath, []byte("0\n"), 0644)
assert.Nil(t, err)
}
}
1 change: 1 addition & 0 deletions container/libcontainer/testdata/clear_refs4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
1 change: 1 addition & 0 deletions container/libcontainer/testdata/clear_refs6
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
1 change: 1 addition & 0 deletions container/libcontainer/testdata/clear_refs8
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0
Loading