Skip to content

Commit 30469c7

Browse files
authored
Merge pull request #11173 from swiftlang/wasm-cherrypicks
🍒 WebAssembly symbol table parsing cherrypicks
2 parents 74320d1 + a62585c commit 30469c7

File tree

7 files changed

+476
-12
lines changed

7 files changed

+476
-12
lines changed

lldb/include/lldb/lldb-enumerations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ enum SectionType {
788788
eSectionTypeLLDBTypeSummaries,
789789
eSectionTypeLLDBFormatters,
790790
eSectionTypeSwiftModules,
791+
eSectionTypeWasmName,
791792
};
792793

793794
FLAGS_ENUM(EmulateInstructionOptions){

lldb/source/Core/Section.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ const char *Section::GetTypeAsCString() const {
155155
return "lldb-formatters";
156156
case eSectionTypeSwiftModules:
157157
return "swift-modules";
158+
case eSectionTypeWasmName:
159+
return "wasm-name";
158160
case eSectionTypeOther:
159161
return "regular";
160162
}
@@ -417,6 +419,7 @@ bool Section::ContainsOnlyDebugInfo() const {
417419
case eSectionTypeCompactUnwind:
418420
case eSectionTypeGoSymtab:
419421
case eSectionTypeAbsoluteAddress:
422+
case eSectionTypeWasmName:
420423
case eSectionTypeOther:
421424
// Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug
422425
// information that we parse at all. This was causing system files with no

lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
11591159
case eSectionTypeDataObjCMessageRefs:
11601160
case eSectionTypeDataObjCCFStrings:
11611161
case eSectionTypeGoSymtab:
1162+
case eSectionTypeWasmName:
11621163
return AddressClass::eData;
11631164

11641165
case eSectionTypeDebug:

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Lines changed: 207 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/StringRef.h"
2323
#include "llvm/BinaryFormat/Magic.h"
2424
#include "llvm/BinaryFormat/Wasm.h"
25+
#include "llvm/Support/CheckedArithmetic.h"
2526
#include "llvm/Support/Endian.h"
2627
#include "llvm/Support/Format.h"
2728
#include <optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
5051
return version == llvm::wasm::WasmVersion;
5152
}
5253

53-
static std::optional<ConstString>
54+
// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55+
static std::optional<std::string>
5456
GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
5557
// A Wasm string is encoded as a vector of UTF-8 codes.
5658
// Vectors are encoded with their u32 length followed by the element
@@ -61,7 +63,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
6163
return std::nullopt;
6264
}
6365

64-
if (len >= (uint64_t(1) << 32)) {
66+
if (len > std::numeric_limits<uint32_t>::max()) {
6567
return std::nullopt;
6668
}
6769

@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
7274
return std::nullopt;
7375
}
7476

75-
llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76-
return ConstString(str);
77+
return std::string(toStringRef(llvm::ArrayRef(str_storage)));
7778
}
7879

7980
char ObjectFileWasm::ID;
@@ -174,15 +175,15 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
174175
if (!c)
175176
return !llvm::errorToBool(c.takeError());
176177

177-
if (payload_len >= (uint64_t(1) << 32))
178+
if (payload_len > std::numeric_limits<uint32_t>::max())
178179
return false;
179180

180181
if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181182
// Custom sections have the id 0. Their contents consist of a name
182183
// identifying the custom section, followed by an uninterpreted sequence
183184
// of bytes.
184185
lldb::offset_t prev_offset = c.tell();
185-
std::optional<ConstString> sect_name = GetWasmString(data, c);
186+
std::optional<std::string> sect_name = GetWasmString(data, c);
186187
if (!sect_name)
187188
return false;
188189

@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191192

192193
uint32_t section_length = payload_len - (c.tell() - prev_offset);
193194
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194-
section_id, *sect_name});
195+
section_id, ConstString(*sect_name)});
195196
*offset_ptr += (c.tell() + section_length);
196197
} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197198
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
@@ -248,12 +249,203 @@ bool ObjectFileWasm::ParseHeader() {
248249
return true;
249250
}
250251

251-
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252+
static llvm::Expected<std::vector<AddressRange>>
253+
ParseFunctions(SectionSP code_section_sp) {
254+
DataExtractor data;
255+
code_section_sp->GetSectionData(data);
256+
lldb::offset_t offset = 0;
257+
258+
const uint64_t function_count = data.GetULEB128(&offset);
259+
if (function_count > std::numeric_limits<uint32_t>::max())
260+
return llvm::createStringError("function count overflows uint32_t");
261+
262+
std::vector<AddressRange> functions;
263+
functions.reserve(function_count);
264+
265+
for (uint32_t i = 0; i < function_count; ++i) {
266+
const uint64_t function_size = data.GetULEB128(&offset);
267+
if (function_size > std::numeric_limits<uint32_t>::max())
268+
return llvm::createStringError("function size overflows uint32_t");
269+
// llvm-objdump considers the ULEB with the function size to be part of the
270+
// function. We can't do that here because that would break symbolic
271+
// breakpoints, as that address is never executed.
272+
functions.emplace_back(code_section_sp, offset, function_size);
273+
274+
std::optional<lldb::offset_t> next_offset =
275+
llvm::checkedAddUnsigned(offset, function_size);
276+
if (!next_offset)
277+
return llvm::createStringError("function offset overflows uint64_t");
278+
offset = *next_offset;
279+
}
280+
281+
return functions;
282+
}
283+
284+
static llvm::Expected<std::vector<AddressRange>>
285+
ParseData(SectionSP data_section_sp) {
286+
DataExtractor data;
287+
data_section_sp->GetSectionData(data);
288+
289+
lldb::offset_t offset = 0;
290+
291+
const uint64_t segment_count = data.GetULEB128(&offset);
292+
if (segment_count > std::numeric_limits<uint32_t>::max())
293+
return llvm::createStringError("segment count overflows uint32_t");
294+
295+
std::vector<AddressRange> segments;
296+
segments.reserve(segment_count);
297+
298+
for (uint32_t i = 0; i < segment_count; ++i) {
299+
const uint64_t flags = data.GetULEB128(&offset);
300+
if (flags > std::numeric_limits<uint32_t>::max())
301+
return llvm::createStringError("segment flags overflows uint32_t");
302+
303+
const uint64_t segment_size = data.GetULEB128(&offset);
304+
if (flags > std::numeric_limits<uint32_t>::max())
305+
return llvm::createStringError("segment size overflows uint32_t");
306+
307+
segments.emplace_back(data_section_sp, offset, segment_size);
308+
309+
std::optional<lldb::offset_t> next_offset =
310+
llvm::checkedAddUnsigned(offset, segment_size);
311+
if (!next_offset)
312+
return llvm::createStringError("segment offset overflows uint64_t");
313+
offset = *next_offset;
314+
}
315+
316+
return segments;
317+
}
318+
319+
static llvm::Expected<std::vector<Symbol>>
320+
ParseNames(SectionSP name_section_sp,
321+
const std::vector<AddressRange> &function_ranges,
322+
const std::vector<AddressRange> &segment_ranges) {
323+
DataExtractor name_section_data;
324+
name_section_sp->GetSectionData(name_section_data);
325+
326+
llvm::DataExtractor data = name_section_data.GetAsLLVM();
327+
llvm::DataExtractor::Cursor c(0);
328+
std::vector<Symbol> symbols;
329+
while (c && c.tell() < data.size()) {
330+
const uint8_t type = data.getU8(c);
331+
const uint64_t size = data.getULEB128(c);
332+
if (size > std::numeric_limits<uint32_t>::max())
333+
return llvm::createStringError("size overflows uint32_t");
334+
335+
switch (type) {
336+
case llvm::wasm::WASM_NAMES_FUNCTION: {
337+
const uint64_t count = data.getULEB128(c);
338+
if (count > std::numeric_limits<uint32_t>::max())
339+
return llvm::createStringError("function count overflows uint32_t");
340+
341+
for (uint64_t i = 0; c && i < count; ++i) {
342+
const uint64_t idx = data.getULEB128(c);
343+
const std::optional<std::string> name = GetWasmString(data, c);
344+
if (!name || idx >= function_ranges.size())
345+
continue;
346+
symbols.emplace_back(
347+
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
348+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
349+
/*is_artificial=*/false, function_ranges[idx],
350+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
351+
/*flags=*/0);
352+
}
353+
} break;
354+
case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
355+
const uint64_t count = data.getULEB128(c);
356+
if (count > std::numeric_limits<uint32_t>::max())
357+
return llvm::createStringError("data count overflows uint32_t");
358+
for (uint64_t i = 0; c && i < count; ++i) {
359+
const uint64_t idx = data.getULEB128(c);
360+
const std::optional<std::string> name = GetWasmString(data, c);
361+
if (!name || idx >= segment_ranges.size())
362+
continue;
363+
symbols.emplace_back(
364+
symbols.size(), Mangled(*name), lldb::eSymbolTypeData,
365+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
366+
/*is_artificial=*/false, segment_ranges[idx],
367+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
368+
/*flags=*/0);
369+
}
370+
371+
} break;
372+
case llvm::wasm::WASM_NAMES_GLOBAL:
373+
case llvm::wasm::WASM_NAMES_LOCAL:
374+
default:
375+
std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), size);
376+
if (!offset)
377+
return llvm::createStringError("offset overflows uint64_t");
378+
c.seek(*offset);
379+
}
380+
}
381+
382+
if (!c)
383+
return c.takeError();
384+
385+
return symbols;
386+
}
387+
388+
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
389+
assert(m_sections_up && "sections must be parsed");
390+
Log *log = GetLog(LLDBLog::Object);
391+
392+
// The name section contains names and indexes. First parse the data from the
393+
// relevant sections so we can access it by its index.
394+
std::vector<AddressRange> function_ranges;
395+
std::vector<AddressRange> segment_ranges;
396+
397+
// Parse the code section.
398+
if (SectionSP code_section_sp =
399+
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false)) {
400+
llvm::Expected<std::vector<AddressRange>> functions =
401+
ParseFunctions(code_section_sp);
402+
if (!functions) {
403+
LLDB_LOG_ERROR(log, functions.takeError(),
404+
"Failed to parse Wasm code section: {0}");
405+
return;
406+
}
407+
function_ranges = *functions;
408+
}
409+
410+
// Parse the data section.
411+
if (SectionSP data_section_sp =
412+
m_sections_up->FindSectionByType(lldb::eSectionTypeData, false)) {
413+
llvm::Expected<std::vector<AddressRange>> segments =
414+
ParseData(data_section_sp);
415+
if (!segments) {
416+
LLDB_LOG_ERROR(log, segments.takeError(),
417+
"Failed to parse Wasm data section: {0}");
418+
return;
419+
}
420+
segment_ranges = *segments;
421+
}
422+
423+
// Parse the name section.
424+
SectionSP name_section_sp =
425+
m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false);
426+
if (!name_section_sp) {
427+
LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section");
428+
return;
429+
}
430+
431+
llvm::Expected<std::vector<Symbol>> symbols =
432+
ParseNames(name_section_sp, function_ranges, segment_ranges);
433+
if (!symbols) {
434+
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
435+
return;
436+
}
437+
438+
for (const Symbol &symbol : *symbols)
439+
symtab.AddSymbol(symbol);
440+
441+
symtab.Finalize();
442+
}
252443

253444
static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254-
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
445+
if (Name == "name")
446+
return lldb::eSectionTypeWasmName;
447+
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
255448
return ObjectFile::GetDWARFSectionTypeFromName(Name);
256-
}
257449
return eSectionTypeOther;
258450
}
259451

@@ -283,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
283475
// For this reason Section::GetFileAddress() must return zero for the
284476
// Code section.
285477
vm_addr = 0;
478+
} else if (llvm::wasm::WASM_SEC_DATA == sect_info.id) {
479+
section_type = eSectionTypeData;
480+
section_name = ConstString("data");
286481
} else {
287482
section_type = GetSectionTypeFromName(sect_info.name.GetStringRef());
288483
if (section_type == eSectionTypeOther)
@@ -397,9 +592,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397592
ReadImageData(sect_info.offset, kBufferSize);
398593
llvm::DataExtractor data = section_header_data.GetAsLLVM();
399594
llvm::DataExtractor::Cursor c(0);
400-
std::optional<ConstString> symbols_url = GetWasmString(data, c);
595+
std::optional<std::string> symbols_url = GetWasmString(data, c);
401596
if (symbols_url)
402-
return FileSpec(symbols_url->GetStringRef());
597+
return FileSpec(*symbols_url);
403598
}
404599
}
405600
return std::nullopt;

lldb/source/Symbol/ObjectFile.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) {
379379
case eSectionTypeELFDynamicSymbols:
380380
case eSectionTypeELFRelocationEntries:
381381
case eSectionTypeELFDynamicLinkInfo:
382+
case eSectionTypeWasmName:
382383
case eSectionTypeOther:
383384
return AddressClass::eUnknown;
384385
case eSectionTypeAbsoluteAddress:

0 commit comments

Comments
 (0)