Skip to content

Commit 7abf6bf

Browse files
mkustermanncommit-bot@chromium.org
authored andcommitted
[gardening] Add coredump archiving support to iso-stress builder.
This is an attempt to enable archiving of coredumps on the "iso-stress" builder, since we're often unable to reproduce crashes from that builder. Issue #46823 TEST=Adds test infra. Change-Id: I9b7276198db9a6c98a74f55d466bf832b03e24f8 Reviewed-on: https://dart-review.googlesource.com/c/sdk/+/214407 Reviewed-by: Alexander Thomas <[email protected]> Reviewed-by: Slava Egorov <[email protected]> Commit-Queue: Martin Kustermann <[email protected]>
1 parent 31afa1b commit 7abf6bf

File tree

3 files changed

+104
-13
lines changed

3 files changed

+104
-13
lines changed

runtime/tests/concurrency/run_stress_test_shards.dart

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import 'package:path/path.dart' as path;
1212

1313
import '../vm/dart/snapshot_test_helper.dart';
1414

15+
int crashCounter = 0;
16+
1517
void forwardStream(Stream<List<int>> input, IOSink output) {
1618
// Print the information line-by-line.
1719
input
@@ -22,22 +24,33 @@ void forwardStream(Stream<List<int>> input, IOSink output) {
2224
});
2325
}
2426

25-
Future<bool> run(String executable, List<String> args) async {
27+
class PotentialCrash {
28+
final String test;
29+
final int pid;
30+
final List<String> binaries;
31+
PotentialCrash(this.test, this.pid, this.binaries);
32+
}
33+
34+
Future<bool> run(
35+
String executable, List<String> args, List<PotentialCrash> crashes) async {
2636
print('Running "$executable ${args.join(' ')}"');
2737
final Process process = await Process.start(executable, args);
2838
forwardStream(process.stdout, stdout);
2939
forwardStream(process.stderr, stderr);
3040
final int exitCode = await process.exitCode;
3141
if (exitCode != 0) {
42+
final crashNr = crashCounter++;
3243
print('=> Running "$executable ${args.join(' ')}" failed with $exitCode');
44+
print('=> Possible crash $crashNr (pid: ${process.pid})');
45+
crashes.add(PotentialCrash('crash-$crashNr', process.pid, [executable]));
3346
io.exitCode = 255; // Make this shard fail.
3447
return false;
3548
}
3649
return true;
3750
}
3851

3952
abstract class TestRunner {
40-
Future runTest();
53+
Future runTest(List<PotentialCrash> crashes);
4154
}
4255

4356
class JitTestRunner extends TestRunner {
@@ -46,8 +59,8 @@ class JitTestRunner extends TestRunner {
4659

4760
JitTestRunner(this.buildDir, this.arguments);
4861

49-
Future runTest() async {
50-
await run('$buildDir/dart', arguments);
62+
Future runTest(List<PotentialCrash> crashes) async {
63+
await run('$buildDir/dart', arguments, crashes);
5164
}
5265
}
5366

@@ -58,19 +71,62 @@ class AotTestRunner extends TestRunner {
5871

5972
AotTestRunner(this.buildDir, this.arguments, this.aotArguments);
6073

61-
Future runTest() async {
74+
Future runTest(List<PotentialCrash> crashes) async {
6275
await withTempDir((String dir) async {
6376
final elfFile = path.join(dir, 'app.elf');
6477

65-
if (await run('$buildDir/gen_snapshot',
66-
['--snapshot-kind=app-aot-elf', '--elf=$elfFile', ...arguments])) {
67-
await run(
68-
'$buildDir/dart_precompiled_runtime', [...aotArguments, elfFile]);
78+
if (await run(
79+
'$buildDir/gen_snapshot',
80+
['--snapshot-kind=app-aot-elf', '--elf=$elfFile', ...arguments],
81+
crashes)) {
82+
await run('$buildDir/dart_precompiled_runtime',
83+
[...aotArguments, elfFile], crashes);
6984
}
7085
});
7186
}
7287
}
7388

89+
// Produces a name that tools/utils.py:BaseCoredumpArchiver supports.
90+
String getArchiveName(String binary) {
91+
final parts = binary.split(Platform.pathSeparator);
92+
late String mode;
93+
late String arch;
94+
final buildDir = parts[1];
95+
for (final prefix in ['Release', 'Debug', 'Product']) {
96+
if (buildDir.startsWith(prefix)) {
97+
mode = prefix.toLowerCase();
98+
arch = buildDir.substring(prefix.length);
99+
}
100+
}
101+
final name = parts.skip(2).join('__');
102+
return 'binary.${mode}_${arch}_${name}';
103+
}
104+
105+
void writeUnexpectedCrashesFile(List<PotentialCrash> crashes) {
106+
// The format of this file is:
107+
//
108+
// test-name,pid,binary-file1,binary-file2,...
109+
//
110+
const unexpectedCrashesFile = 'unexpected-crashes';
111+
112+
final buffer = StringBuffer();
113+
final Set<String> archivedBinaries = {};
114+
for (final crash in crashes) {
115+
buffer.write('${crash.test},${crash.pid}');
116+
for (final binary in crash.binaries) {
117+
final archivedName = getArchiveName(binary);
118+
buffer.write(',$archivedName');
119+
if (!archivedBinaries.contains(archivedName)) {
120+
File(binary).copySync(archivedName);
121+
archivedBinaries.add(archivedName);
122+
}
123+
}
124+
buffer.writeln();
125+
}
126+
127+
File(unexpectedCrashesFile).writeAsStringSync(buffer.toString());
128+
}
129+
74130
const int tsanShards = 200;
75131

76132
final configurations = <TestRunner>[
@@ -120,11 +176,14 @@ main(List<String> arguments) async {
120176
..addOption('shards', help: 'number of shards used', defaultsTo: '1')
121177
..addOption('shard', help: 'shard id', defaultsTo: '1')
122178
..addOption('output-directory',
123-
help: 'unused parameter to make sharding infra work', defaultsTo: '');
179+
help: 'unused parameter to make sharding infra work', defaultsTo: '')
180+
..addFlag('copy-coredumps',
181+
help: 'whether to copy binaries for coredumps', defaultsTo: false);
124182

125183
final options = parser.parse(arguments);
126184
final shards = int.parse(options['shards']);
127185
final shard = int.parse(options['shard']) - 1;
186+
final copyCoredumps = options['copy-coredumps'] as bool;
128187

129188
// Tasks will eventually be killed if they do not have any output for some
130189
// time. So we'll explicitly print something every 4 minutes.
@@ -140,8 +199,12 @@ main(List<String> arguments) async {
140199
thisShardsConfigurations.add(configurations[i]);
141200
}
142201
}
202+
final crashes = <PotentialCrash>[];
143203
for (final config in thisShardsConfigurations) {
144-
await config.runTest();
204+
await config.runTest(crashes);
205+
}
206+
if (!crashes.isEmpty && copyCoredumps) {
207+
writeUnexpectedCrashesFile(crashes);
145208
}
146209
} finally {
147210
timer.cancel();

tools/bots/test_matrix.json

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3912,9 +3912,11 @@
39123912
},
39133913
{
39143914
"name": "Run Isolate Stress Tests",
3915-
"script": "out/ReleaseX64/dart",
3915+
"script": "tools/run_with_coredumps_enabled.py",
39163916
"arguments": [
3917-
"runtime/tests/concurrency/run_stress_test_shards.dart"
3917+
"out/ReleaseX64/dart",
3918+
"runtime/tests/concurrency/run_stress_test_shards.dart",
3919+
"--copy-coredumps"
39183920
],
39193921
"shards": 10,
39203922
"fileset": "vm-kernel"
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env python3
2+
# Copyright (c) 2021, the Dart project authors. Please see the AUTHORS file
3+
# for details. All rights reserved. Use of this source code is governed by a
4+
# BSD-style license that can be found in the LICENSE file.
5+
6+
from contextlib import ExitStack
7+
import subprocess
8+
import sys
9+
10+
import utils
11+
12+
13+
def Main():
14+
args = sys.argv[1:]
15+
16+
with ExitStack() as stack:
17+
for ctx in utils.CoreDumpArchiver(args):
18+
stack.enter_context(ctx)
19+
exit_code = subprocess.call(args)
20+
21+
utils.DiagnoseExitCode(exit_code, args)
22+
return exit_code
23+
24+
25+
if __name__ == '__main__':
26+
sys.exit(Main())

0 commit comments

Comments
 (0)