Skip to content

Commit 39d7351

Browse files
committed
YARN-11687. Update CGroupsResourceCalculator to track usages using cgroupv2
- port CGroupsResourceCalculator to V2
1 parent daafc8a commit 39d7351

File tree

2 files changed

+210
-0
lines changed

2 files changed

+210
-0
lines changed
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import org.apache.commons.io.FileUtils;
22+
import org.apache.commons.lang3.StringUtils;
23+
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
26+
27+
import org.apache.hadoop.classification.VisibleForTesting;
28+
import org.apache.hadoop.util.CpuTimeTracker;
29+
import org.apache.hadoop.util.SysInfoLinux;
30+
import org.apache.hadoop.yarn.exceptions.YarnException;
31+
import org.apache.hadoop.yarn.util.Clock;
32+
import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree;
33+
import org.apache.hadoop.yarn.util.SystemClock;
34+
35+
import java.io.IOException;
36+
import java.math.BigInteger;
37+
import java.nio.charset.StandardCharsets;
38+
import java.nio.file.Files;
39+
import java.nio.file.Path;
40+
import java.nio.file.Paths;
41+
import java.util.List;
42+
import java.util.Map;
43+
import java.util.concurrent.ConcurrentHashMap;
44+
import java.util.stream.Collectors;
45+
import java.util.stream.Stream;
46+
47+
/**
48+
* A CGroupV2 file-system based Resource calculator without the process tree features.
49+
*
50+
* The feature only works if cluster runs in pure V2 version, because when we read the
51+
* /proc/{pid}/cgroup file currently we can not handle multiple lines.
52+
*/
53+
public class CGroupsV2ResourceCalculator extends ResourceCalculatorProcessTree {
54+
private static final Logger LOG = LoggerFactory.getLogger(CGroupsV2ResourceCalculator.class);
55+
private final Map<String, String> stats = new ConcurrentHashMap<>();
56+
private final Clock clock = SystemClock.getInstance();
57+
private final String pid;
58+
59+
@VisibleForTesting
60+
long jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
61+
@VisibleForTesting
62+
CpuTimeTracker cpuTimeTracker;
63+
64+
/**
65+
* Create resource calculator for the container that has the specified pid.
66+
* @param pid A pid from the cgroup or null for all containers
67+
*/
68+
public CGroupsV2ResourceCalculator(String pid) {
69+
super(pid);
70+
this.pid = pid;
71+
}
72+
73+
@Override
74+
public void initialize() throws YarnException {
75+
jiffyLengthMs = SysInfoLinux.JIFFY_LENGTH_IN_MILLIS;
76+
cpuTimeTracker = new CpuTimeTracker(jiffyLengthMs);
77+
}
78+
79+
@Override
80+
public float getCpuUsagePercent() {
81+
return cpuTimeTracker.getCpuTrackerUsagePercent();
82+
}
83+
84+
@Override
85+
public long getCumulativeCpuTime() {
86+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#cpu-interface-files
87+
return jiffyLengthMs < 0
88+
? UNAVAILABLE
89+
: getStat("cpu.stat#usage_usec") * jiffyLengthMs;
90+
}
91+
92+
@Override
93+
public long getRssMemorySize(int olderThanAge) {
94+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
95+
return 1 < olderThanAge
96+
? UNAVAILABLE
97+
: getStat("memory.stat#anon");
98+
}
99+
100+
@Override
101+
public long getVirtualMemorySize(int olderThanAge) {
102+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#memory-interface-files
103+
return 1 < olderThanAge
104+
? UNAVAILABLE
105+
: getStat("memory.stat#vmalloc");
106+
}
107+
108+
@Override
109+
public String getProcessTreeDump() {
110+
// We do not have a process tree in cgroups return just the pid for tracking
111+
return pid;
112+
}
113+
114+
@Override
115+
public boolean checkPidPgrpidForMatch() {
116+
// We do not have a process tree in cgroups returning default ok
117+
return true;
118+
}
119+
120+
@Override
121+
public void updateProcessTree() {
122+
try (Stream<Path> cGroupFiles = Files.list(getCGroupPath())){
123+
List<Path> statFiles = cGroupFiles
124+
.filter(path -> path.endsWith(".stat"))
125+
.collect(Collectors.toList());
126+
for (Path statFile : statFiles) {
127+
String[] lines = fileToString(statFile).split(System.lineSeparator());
128+
for (String line: lines) {
129+
String[] parts = line.split(" ");
130+
stats.put(statFile.getFileName() + "#" + parts[0], parts[1]);
131+
}
132+
}
133+
cpuTimeTracker.updateElapsedJiffies(
134+
BigInteger.valueOf(getStat("cpu.stat#usage_usec")), clock.getTime());
135+
LOG.debug("The {} process has the following stat properties updated: {}", pid, stats);
136+
} catch (Exception e) {
137+
LOG.warn("Failed to read CGroupV2 stats for process: " + pid, e);
138+
}
139+
}
140+
141+
private Path getCGroupPath() throws IOException {
142+
String relativePath = pid == null
143+
? ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup("")
144+
: getCGroupRelativePathForPid();
145+
return Paths.get(ResourceHandlerModule.getCGroupsHandler().getCGroupMountPath(), relativePath);
146+
}
147+
148+
private String getCGroupRelativePathForPid() throws IOException {
149+
// https://docs.kernel.org/admin-guide/cgroup-v2.html#processes
150+
String pidCGroupsFile = fileToString(Paths.get("/proc", pid, "cgroup"));
151+
String controllerPath = StringUtils.substringAfterLast(pidCGroupsFile, ":");
152+
return ResourceHandlerModule.getCGroupsHandler().getRelativePathForCGroup(controllerPath);
153+
}
154+
155+
private long getStat(String key) {
156+
return Long.parseLong(stats.getOrDefault(key, "0"));
157+
}
158+
159+
private String fileToString(Path path) throws IOException {
160+
return FileUtils.readFileToString(path.toFile(), StandardCharsets.UTF_8);
161+
}
162+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/**
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.hadoop.yarn.server.nodemanager.containermanager.linux.resources;
20+
21+
import org.junit.Test;
22+
23+
import org.apache.hadoop.util.CpuTimeTracker;
24+
25+
import static org.mockito.ArgumentMatchers.any;
26+
import static org.mockito.ArgumentMatchers.anyLong;
27+
import static org.mockito.Mockito.mock;
28+
import static org.mockito.Mockito.never;
29+
import static org.mockito.Mockito.verify;
30+
31+
/**
32+
* Unit test for CGroupsV2ResourceCalculator.
33+
*/
34+
public class TestCGroupsV2ResourceCalculator {
35+
36+
@Test
37+
public void testPidNotFound() {
38+
CGroupsV2ResourceCalculator tested = createCalculator();
39+
tested.updateProcessTree();
40+
verify(tested.cpuTimeTracker, never()).updateElapsedJiffies(any(), anyLong());
41+
}
42+
43+
private CGroupsV2ResourceCalculator createCalculator() {
44+
CGroupsV2ResourceCalculator calculator = new CGroupsV2ResourceCalculator("1");
45+
calculator.cpuTimeTracker = mock(CpuTimeTracker.class);
46+
return calculator;
47+
}
48+
}

0 commit comments

Comments
 (0)