Skip to content

Commit bdd18a5

Browse files
committed
build: speedup compilation of mksnapshot output
Incremental compilation of Node.js is slow. Currently on a powerful Linux machine, it takes about 5.8 seconds to compile `gen/node_snapshot.cc` with g++. As in the previous PR which dealt with `node_js2c`, we add a new flag `--use-string-literals` to `node_mksnapshot`. When this flag is set, we emit string literals instead of array literals for the snapshot blob and for the code cache, i.e.: ```c++ // old: static const uint8_t X[] = { ... }; static const uint8_t *X = "..."; ``` I only enabled the new flag on Linux/macOS, since those are systems that I have available for testing. On my Linux system with gcc, it speeds up compilation of this file by 3.7s (5.8s -> 2.1s). On my Mac system with clang, it speeds up compilation by 1.7s (3.4s -> 1.7s). Again, the right thing here is probably to generate separate files for the snapshot blob and for each code cache output, but this is a nice intermediate speedup. The thing I'm most unsure about in this PR is how to actually thread the argument through. I considered adding it to the general argument parser, but that felt strange, since this flag only makes sense during the build process. So I kind of hacked it in, which also feels weird. Suggestions are very welcome. Refs: #47984 Refs: #48160
1 parent 817c579 commit bdd18a5

File tree

4 files changed

+92
-25
lines changed

4 files changed

+92
-25
lines changed

node.gyp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,8 +650,16 @@
650650
],
651651
'action': [
652652
'<@(_inputs)',
653+
'<@(node_mksnapshot_use_string_literals_flag)',
653654
'<@(_outputs)',
654655
],
656+
'conditions': [
657+
['OS=="linux" or OS=="mac"', {
658+
'variables': {'node_mksnapshot_use_string_literals_flag': ['--use-string-literals']},
659+
}, {
660+
'variables': {'node_mksnapshot_use_string_literals_flag': []},
661+
}],
662+
],
655663
},
656664
],
657665
}],

src/node_snapshot_builder.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,8 @@ class NODE_EXTERN_PRIVATE SnapshotBuilder {
1818
public:
1919
static ExitCode Generate(std::ostream& out,
2020
const std::vector<std::string> args,
21-
const std::vector<std::string> exec_args);
21+
const std::vector<std::string> exec_args,
22+
bool use_string_literals);
2223

2324
// Generate the snapshot into out.
2425
static ExitCode Generate(SnapshotData* out,

src/node_snapshotable.cc

Lines changed: 70 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -742,23 +742,61 @@ static std::string FormatSize(size_t size) {
742742
return buf;
743743
}
744744

745-
static void WriteStaticCodeCacheData(std::ostream* ss,
746-
const builtins::CodeCacheInfo& info) {
745+
static void WriteDataAsCharString(std::ostream* ss,
746+
const uint8_t* data,
747+
size_t length) {
748+
for (size_t i = 0; i < length; i++) {
749+
const uint8_t ch = data[i];
750+
// We can print most printable characters directly. The exceptions are '\'
751+
// (escape characters), " (would end the string), and ? (trigraphs). The
752+
// latter may be overly conservative: we compile with C++17 which doesn't
753+
// support trigraphs.
754+
if (ch >= ' ' && ch <= '~' && ch != '\\' && ch != '"' && ch != '?') {
755+
*ss << ch;
756+
} else {
757+
// All other characters are blindly output as octal.
758+
const char c0 = '0' + ((ch & 0700) >> 6);
759+
const char c1 = '0' + ((ch & 0070) >> 3);
760+
const char c2 = '0' + (ch & 7);
761+
*ss << "\\" << c0 << c1 << c2;
762+
}
763+
if (i % 64 == 63) {
764+
// Go to a newline every 64 bytes since many text editors have
765+
// problems with very long lines.
766+
*ss << "\"\n\"";
767+
}
768+
}
769+
}
770+
771+
static void WriteStaticCodeCacheDataAsArray(
772+
std::ostream* ss, const builtins::CodeCacheInfo& info) {
747773
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
748774
WriteVector(ss, info.data.data, info.data.length);
749-
*ss << "};";
775+
*ss << "};\n";
776+
}
777+
778+
static void WriteStaticCodeCacheDataAsStringLiteral(
779+
std::ostream* ss, const builtins::CodeCacheInfo& info) {
780+
*ss << "static const uint8_t *" << GetCodeCacheDefName(info.id)
781+
<< "= reinterpret_cast<const uint8_t *>(\"";
782+
WriteDataAsCharString(ss, info.data.data, info.data.length);
783+
*ss << "\");\n";
750784
}
751785

752-
static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) {
786+
static void WriteCodeCacheInitializer(std::ostream* ss,
787+
const std::string& id,
788+
size_t size) {
753789
std::string def_name = GetCodeCacheDefName(id);
754790
*ss << " { \"" << id << "\",\n";
755791
*ss << " {" << def_name << ",\n";
756-
*ss << " arraysize(" << def_name << "),\n";
792+
*ss << " " << size << ",\n";
757793
*ss << " }\n";
758794
*ss << " },\n";
759795
}
760796

761-
void FormatBlob(std::ostream& ss, const SnapshotData* data) {
797+
void FormatBlob(std::ostream& ss,
798+
const SnapshotData* data,
799+
const bool use_string_literals) {
762800
ss << R"(#include <cstddef>
763801
#include "env.h"
764802
#include "node_snapshot_builder.h"
@@ -767,21 +805,32 @@ void FormatBlob(std::ostream& ss, const SnapshotData* data) {
767805
// This file is generated by tools/snapshot. Do not edit.
768806
769807
namespace node {
770-
771-
static const char v8_snapshot_blob_data[] = {
772808
)";
773-
WriteVector(&ss,
774-
data->v8_snapshot_blob_data.data,
775-
data->v8_snapshot_blob_data.raw_size);
776-
ss << R"(};
777-
778-
static const int v8_snapshot_blob_size = )"
809+
if (use_string_literals) {
810+
ss << R"(static const char *v8_snapshot_blob_data = ")";
811+
WriteDataAsCharString(
812+
&ss,
813+
reinterpret_cast<const uint8_t*>(data->v8_snapshot_blob_data.data),
814+
data->v8_snapshot_blob_data.raw_size);
815+
ss << R"(";)";
816+
} else {
817+
ss << R"(static const char v8_snapshot_blob_data[] = {)";
818+
WriteVector(&ss,
819+
data->v8_snapshot_blob_data.data,
820+
data->v8_snapshot_blob_data.raw_size);
821+
ss << R"(};)";
822+
}
823+
ss << R"(static const int v8_snapshot_blob_size = )"
779824
<< data->v8_snapshot_blob_data.raw_size << ";";
780825

781-
// Windows can't deal with too many large vector initializers.
782-
// Store the data into static arrays first.
783826
for (const auto& item : data->code_cache) {
784-
WriteStaticCodeCacheData(&ss, item);
827+
if (use_string_literals) {
828+
WriteStaticCodeCacheDataAsStringLiteral(&ss, item);
829+
} else {
830+
// Windows can't deal with too many large vector initializers.
831+
// Store the data into static arrays first.
832+
WriteStaticCodeCacheDataAsArray(&ss, item);
833+
}
785834
}
786835

787836
ss << R"(const SnapshotData snapshot_data {
@@ -808,7 +857,7 @@ static const int v8_snapshot_blob_size = )"
808857
// -- code_cache begins --
809858
{)";
810859
for (const auto& item : data->code_cache) {
811-
WriteCodeCacheInitializer(&ss, item.id);
860+
WriteCodeCacheInitializer(&ss, item.id, item.data.length);
812861
}
813862
ss << R"(
814863
}
@@ -1022,13 +1071,14 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
10221071

10231072
ExitCode SnapshotBuilder::Generate(std::ostream& out,
10241073
const std::vector<std::string> args,
1025-
const std::vector<std::string> exec_args) {
1074+
const std::vector<std::string> exec_args,
1075+
const bool use_string_literals) {
10261076
SnapshotData data;
10271077
ExitCode exit_code = Generate(&data, args, exec_args);
10281078
if (exit_code != ExitCode::kNoFailure) {
10291079
return exit_code;
10301080
}
1031-
FormatBlob(out, &data);
1081+
FormatBlob(out, &data, use_string_literals);
10321082
return exit_code;
10331083
}
10341084

tools/snapshot/node_mksnapshot.cc

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,23 @@ int main(int argc, char* argv[]) {
5858

5959
int BuildSnapshot(int argc, char* argv[]) {
6060
if (argc < 2) {
61-
std::cerr << "Usage: " << argv[0] << " <path/to/output.cc>\n";
61+
std::cerr << "Usage: " << argv[0]
62+
<< " [--use-string-literals] <path/to/output.cc>\n";
6263
std::cerr << " " << argv[0] << " --build-snapshot "
6364
<< "<path/to/script.js> <path/to/output.cc>\n";
6465
return 1;
6566
}
6667

68+
std::vector<std::string> args{argv, argv + argc};
69+
size_t size_before_remove = args.size();
70+
args.erase(std::remove(args.begin(), args.end(), "--use-string-literals"),
71+
args.end());
72+
// If the size of args changed, we must have removed a
73+
// "--use-string-literals".
74+
const bool use_string_literals = args.size() != size_before_remove;
75+
6776
std::unique_ptr<node::InitializationResult> result =
68-
node::InitializeOncePerProcess(
69-
std::vector<std::string>(argv, argv + argc));
77+
node::InitializeOncePerProcess(args);
7078

7179
CHECK(!result->early_return());
7280
CHECK_EQ(result->exit_code(), 0);
@@ -87,7 +95,7 @@ int BuildSnapshot(int argc, char* argv[]) {
8795
node::ExitCode exit_code = node::ExitCode::kNoFailure;
8896
{
8997
exit_code = node::SnapshotBuilder::Generate(
90-
out, result->args(), result->exec_args());
98+
out, result->args(), result->exec_args(), use_string_literals);
9199
if (exit_code == node::ExitCode::kNoFailure) {
92100
if (!out) {
93101
std::cerr << "Failed to write " << out_path << "\n";

0 commit comments

Comments
 (0)