Skip to content

Commit 9e63892

Browse files
committed
[lldb] Support parsing the Wasm symbol table (llvm#153093)
This PR adds support for parsing the WebAssembly symbol table. The symbol table is encoded in the "names" section and contains names and indexes into other sections. For now we only support parsing function (code) symbols. The result is that you can set breakpoints by symbol name, while previously breakpoints by name required debug info (DWARF). This is also necessary for Swift, which checks for the presence of `swift_release` as a heuristic to determine if there's a static Swift stdlib. (cherry picked from commit 5be2063)
1 parent 74320d1 commit 9e63892

File tree

7 files changed

+358
-10
lines changed

7 files changed

+358
-10
lines changed

lldb/include/lldb/lldb-enumerations.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -788,6 +788,7 @@ enum SectionType {
788788
eSectionTypeLLDBTypeSummaries,
789789
eSectionTypeLLDBFormatters,
790790
eSectionTypeSwiftModules,
791+
eSectionTypeWasmName,
791792
};
792793

793794
FLAGS_ENUM(EmulateInstructionOptions){

lldb/source/Core/Section.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,8 @@ const char *Section::GetTypeAsCString() const {
155155
return "lldb-formatters";
156156
case eSectionTypeSwiftModules:
157157
return "swift-modules";
158+
case eSectionTypeWasmName:
159+
return "wasm-name";
158160
case eSectionTypeOther:
159161
return "regular";
160162
}
@@ -417,6 +419,7 @@ bool Section::ContainsOnlyDebugInfo() const {
417419
case eSectionTypeCompactUnwind:
418420
case eSectionTypeGoSymtab:
419421
case eSectionTypeAbsoluteAddress:
422+
case eSectionTypeWasmName:
420423
case eSectionTypeOther:
421424
// Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug
422425
// information that we parse at all. This was causing system files with no

lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1159,6 +1159,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) {
11591159
case eSectionTypeDataObjCMessageRefs:
11601160
case eSectionTypeDataObjCCFStrings:
11611161
case eSectionTypeGoSymtab:
1162+
case eSectionTypeWasmName:
11621163
return AddressClass::eData;
11631164

11641165
case eSectionTypeDebug:

lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp

Lines changed: 135 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/StringRef.h"
2323
#include "llvm/BinaryFormat/Magic.h"
2424
#include "llvm/BinaryFormat/Wasm.h"
25+
#include "llvm/Support/CheckedArithmetic.h"
2526
#include "llvm/Support/Endian.h"
2627
#include "llvm/Support/Format.h"
2728
#include <optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
5051
return version == llvm::wasm::WasmVersion;
5152
}
5253

53-
static std::optional<ConstString>
54+
// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55+
static std::optional<std::string>
5456
GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
5557
// A Wasm string is encoded as a vector of UTF-8 codes.
5658
// Vectors are encoded with their u32 length followed by the element
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
7274
return std::nullopt;
7375
}
7476

75-
llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage));
76-
return ConstString(str);
77+
return std::string(toStringRef(llvm::ArrayRef(str_storage)));
7778
}
7879

7980
char ObjectFileWasm::ID;
@@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
182183
// identifying the custom section, followed by an uninterpreted sequence
183184
// of bytes.
184185
lldb::offset_t prev_offset = c.tell();
185-
std::optional<ConstString> sect_name = GetWasmString(data, c);
186+
std::optional<std::string> sect_name = GetWasmString(data, c);
186187
if (!sect_name)
187188
return false;
188189

@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191192

192193
uint32_t section_length = payload_len - (c.tell() - prev_offset);
193194
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length,
194-
section_id, *sect_name});
195+
section_id, ConstString(*sect_name)});
195196
*offset_ptr += (c.tell() + section_length);
196197
} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197198
m_sect_infos.push_back(section_info{*offset_ptr + c.tell(),
@@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() {
248249
return true;
249250
}
250251

251-
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {}
252+
static llvm::Expected<std::vector<AddressRange>>
253+
ParseFunctions(SectionSP code_section_sp) {
254+
DataExtractor code_section_data;
255+
code_section_sp->GetSectionData(code_section_data);
256+
lldb::offset_t offset = 0;
257+
258+
const uint64_t function_count = code_section_data.GetULEB128(&offset);
259+
if (function_count >= std::numeric_limits<uint32_t>::max())
260+
return llvm::createStringError("function count overflows uint32_t");
261+
262+
std::vector<AddressRange> functions;
263+
functions.reserve(function_count);
264+
265+
for (uint32_t i = 0; i < function_count; ++i) {
266+
const uint64_t function_size = code_section_data.GetULEB128(&offset);
267+
if (function_size >= std::numeric_limits<uint32_t>::max())
268+
return llvm::createStringError("function size overflows uint32_t");
269+
// llvm-objdump considers the ULEB with the function size to be part of the
270+
// function. We can't do that here because that would break symbolic
271+
// breakpoints, as that address is never executed.
272+
functions.emplace_back(code_section_sp, offset, function_size);
273+
274+
std::optional<lldb::offset_t> next_offset =
275+
llvm::checkedAddUnsigned(offset, function_size);
276+
if (!next_offset)
277+
return llvm::createStringError("function offset overflows uint64_t");
278+
offset = *next_offset;
279+
}
280+
281+
return functions;
282+
}
283+
284+
static llvm::Expected<std::vector<Symbol>>
285+
ParseNames(SectionSP name_section_sp,
286+
const std::vector<AddressRange> &functions) {
287+
DataExtractor name_section_data;
288+
name_section_sp->GetSectionData(name_section_data);
289+
290+
llvm::DataExtractor data = name_section_data.GetAsLLVM();
291+
llvm::DataExtractor::Cursor c(0);
292+
std::vector<Symbol> symbols;
293+
while (c && c.tell() < data.size()) {
294+
const uint8_t type = data.getU8(c);
295+
const uint64_t size = data.getULEB128(c);
296+
if (size >= std::numeric_limits<uint32_t>::max())
297+
return llvm::createStringError("size overflows uint32_t");
298+
299+
switch (type) {
300+
case llvm::wasm::WASM_NAMES_FUNCTION: {
301+
const uint64_t count = data.getULEB128(c);
302+
if (count >= std::numeric_limits<uint32_t>::max())
303+
return llvm::createStringError("function count overflows uint32_t");
304+
305+
for (uint64_t i = 0; c && i < count; ++i) {
306+
const uint64_t idx = data.getULEB128(c);
307+
const std::optional<std::string> name = GetWasmString(data, c);
308+
if (!name || idx >= functions.size())
309+
continue;
310+
symbols.emplace_back(
311+
symbols.size(), Mangled(*name), lldb::eSymbolTypeCode,
312+
/*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false,
313+
/*is_artificial=*/false, functions[idx],
314+
/*size_is_valid=*/true, /*contains_linker_annotations=*/false,
315+
/*flags=*/0);
316+
}
317+
} break;
318+
case llvm::wasm::WASM_NAMES_DATA_SEGMENT:
319+
case llvm::wasm::WASM_NAMES_GLOBAL:
320+
case llvm::wasm::WASM_NAMES_LOCAL:
321+
default:
322+
std::optional<uint64_t> offset = llvm::checkedAddUnsigned(c.tell(), size);
323+
if (!offset)
324+
return llvm::createStringError("offset overflows uint64_t");
325+
c.seek(*offset);
326+
}
327+
}
328+
329+
if (!c)
330+
return c.takeError();
331+
332+
return symbols;
333+
}
334+
335+
void ObjectFileWasm::ParseSymtab(Symtab &symtab) {
336+
assert(m_sections_up && "sections must be parsed");
337+
Log *log = GetLog(LLDBLog::Object);
338+
339+
// The name section contains names and indexes. First parse the functions from
340+
// the code section so we can access them by their index.
341+
SectionSP code_section_sp =
342+
m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false);
343+
if (!code_section_sp) {
344+
LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section");
345+
return;
346+
}
347+
348+
llvm::Expected<std::vector<AddressRange>> functions =
349+
ParseFunctions(code_section_sp);
350+
if (!functions) {
351+
LLDB_LOG_ERROR(log, functions.takeError(),
352+
"Failed to parse Wasm functions: {0}");
353+
return;
354+
}
355+
356+
// Parse the name section.
357+
SectionSP name_section_sp =
358+
m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false);
359+
if (!name_section_sp) {
360+
LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section");
361+
return;
362+
}
363+
364+
llvm::Expected<std::vector<Symbol>> symbols =
365+
ParseNames(name_section_sp, *functions);
366+
if (!symbols) {
367+
LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}");
368+
return;
369+
}
370+
371+
for (const Symbol &symbol : *symbols)
372+
symtab.AddSymbol(symbol);
373+
374+
symtab.Finalize();
375+
}
252376

253377
static SectionType GetSectionTypeFromName(llvm::StringRef Name) {
254-
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) {
378+
if (Name == "name")
379+
return lldb::eSectionTypeWasmName;
380+
if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_"))
255381
return ObjectFile::GetDWARFSectionTypeFromName(Name);
256-
}
257382
return eSectionTypeOther;
258383
}
259384

@@ -397,9 +522,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397522
ReadImageData(sect_info.offset, kBufferSize);
398523
llvm::DataExtractor data = section_header_data.GetAsLLVM();
399524
llvm::DataExtractor::Cursor c(0);
400-
std::optional<ConstString> symbols_url = GetWasmString(data, c);
525+
std::optional<std::string> symbols_url = GetWasmString(data, c);
401526
if (symbols_url)
402-
return FileSpec(symbols_url->GetStringRef());
527+
return FileSpec(*symbols_url);
403528
}
404529
}
405530
return std::nullopt;

lldb/source/Symbol/ObjectFile.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) {
379379
case eSectionTypeELFDynamicSymbols:
380380
case eSectionTypeELFRelocationEntries:
381381
case eSectionTypeELFDynamicLinkInfo:
382+
case eSectionTypeWasmName:
382383
case eSectionTypeOther:
383384
return AddressClass::eUnknown;
384385
case eSectionTypeAbsoluteAddress:

0 commit comments

Comments
 (0)