From dde1ce462f1744b52bae0f4f73dcd41a29928bfc Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Mon, 11 Aug 2025 17:36:09 -0700 Subject: [PATCH 1/2] [lldb] Support parsing the Wasm symbol table #153093 This PR adds support for parsing the WebAssembly symbol table. The symbol table is encoded in the "names" section and contains names and indexes into other sections. For now we only support parsing function (code) symbols. The result is that you can set breakpoints by symbol name, while previously breakpoints by name required debug info (DWARF). This is also necessary for Swift, which checks for the presence of swift_release as a heuristic to determine if there's a static Swift stdlib. --- lldb/include/lldb/lldb-enumerations.h | 1 + lldb/source/Core/Section.cpp | 3 + .../ObjectFile/Mach-O/ObjectFileMachO.cpp | 1 + .../ObjectFile/wasm/ObjectFileWasm.cpp | 145 +++++++++++- lldb/source/Symbol/ObjectFile.cpp | 1 + .../test/Shell/Symtab/Inputs/simple.wasm.yaml | 210 ++++++++++++++++++ lldb/test/Shell/Symtab/symtab-wasm.test | 7 + 7 files changed, 358 insertions(+), 10 deletions(-) create mode 100644 lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml create mode 100644 lldb/test/Shell/Symtab/symtab-wasm.test diff --git a/lldb/include/lldb/lldb-enumerations.h b/lldb/include/lldb/lldb-enumerations.h index c63c1f03e58da..fec9fdef44df9 100644 --- a/lldb/include/lldb/lldb-enumerations.h +++ b/lldb/include/lldb/lldb-enumerations.h @@ -777,6 +777,7 @@ enum SectionType { eSectionTypeLLDBTypeSummaries, eSectionTypeLLDBFormatters, eSectionTypeSwiftModules, + eSectionTypeWasmName, }; FLAGS_ENUM(EmulateInstructionOptions){ diff --git a/lldb/source/Core/Section.cpp b/lldb/source/Core/Section.cpp index 27dcf987b0278..02d9d86fe5374 100644 --- a/lldb/source/Core/Section.cpp +++ b/lldb/source/Core/Section.cpp @@ -153,6 +153,8 @@ const char *Section::GetTypeAsCString() const { return "lldb-formatters"; case eSectionTypeSwiftModules: return "swift-modules"; + case eSectionTypeWasmName: + return "wasm-name"; case eSectionTypeOther: return "regular"; } @@ -415,6 +417,7 @@ bool Section::ContainsOnlyDebugInfo() const { case eSectionTypeCompactUnwind: case eSectionTypeGoSymtab: case eSectionTypeAbsoluteAddress: + case eSectionTypeWasmName: case eSectionTypeOther: // Used for "__dof_cache" in mach-o or ".debug" for COFF which isn't debug // information that we parse at all. This was causing system files with no diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp index 13df6e2f26b53..d7cb60e3f0c38 100644 --- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp +++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp @@ -1156,6 +1156,7 @@ AddressClass ObjectFileMachO::GetAddressClass(lldb::addr_t file_addr) { case eSectionTypeDataObjCMessageRefs: case eSectionTypeDataObjCCFStrings: case eSectionTypeGoSymtab: + case eSectionTypeWasmName: return AddressClass::eData; case eSectionTypeDebug: diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index b1efd25949379..6d0b68d5ca54c 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/BinaryFormat/Magic.h" #include "llvm/BinaryFormat/Wasm.h" +#include "llvm/Support/CheckedArithmetic.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Format.h" #include @@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) { return version == llvm::wasm::WasmVersion; } -static std::optional +// FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor. +static std::optional GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { // A Wasm string is encoded as a vector of UTF-8 codes. // Vectors are encoded with their u32 length followed by the element @@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) { return std::nullopt; } - llvm::StringRef str = toStringRef(llvm::ArrayRef(str_storage)); - return ConstString(str); + return std::string(toStringRef(llvm::ArrayRef(str_storage))); } char ObjectFileWasm::ID; @@ -182,7 +183,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { // identifying the custom section, followed by an uninterpreted sequence // of bytes. lldb::offset_t prev_offset = c.tell(); - std::optional sect_name = GetWasmString(data, c); + std::optional sect_name = GetWasmString(data, c); if (!sect_name) return false; @@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) { uint32_t section_length = payload_len - (c.tell() - prev_offset); m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), section_length, - section_id, *sect_name}); + section_id, ConstString(*sect_name)}); *offset_ptr += (c.tell() + section_length); } else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) { m_sect_infos.push_back(section_info{*offset_ptr + c.tell(), @@ -248,12 +249,136 @@ bool ObjectFileWasm::ParseHeader() { return true; } -void ObjectFileWasm::ParseSymtab(Symtab &symtab) {} +static llvm::Expected> +ParseFunctions(SectionSP code_section_sp) { + DataExtractor code_section_data; + code_section_sp->GetSectionData(code_section_data); + lldb::offset_t offset = 0; + + const uint64_t function_count = code_section_data.GetULEB128(&offset); + if (function_count >= std::numeric_limits::max()) + return llvm::createStringError("function count overflows uint32_t"); + + std::vector functions; + functions.reserve(function_count); + + for (uint32_t i = 0; i < function_count; ++i) { + const uint64_t function_size = code_section_data.GetULEB128(&offset); + if (function_size >= std::numeric_limits::max()) + return llvm::createStringError("function size overflows uint32_t"); + // llvm-objdump considers the ULEB with the function size to be part of the + // function. We can't do that here because that would break symbolic + // breakpoints, as that address is never executed. + functions.emplace_back(code_section_sp, offset, function_size); + + std::optional next_offset = + llvm::checkedAddUnsigned(offset, function_size); + if (!next_offset) + return llvm::createStringError("offset overflows uint64_t"); + offset = *next_offset; + } + + return functions; +} + +static llvm::Expected> +ParseNames(SectionSP name_section_sp, + const std::vector &functions) { + DataExtractor name_section_data; + name_section_sp->GetSectionData(name_section_data); + + llvm::DataExtractor data = name_section_data.GetAsLLVM(); + llvm::DataExtractor::Cursor c(0); + std::vector symbols; + while (c && c.tell() < data.size()) { + const uint8_t type = data.getU8(c); + const uint64_t size = data.getULEB128(c); + if (size >= std::numeric_limits::max()) + return llvm::createStringError("size overflows uint32_t"); + + switch (type) { + case llvm::wasm::WASM_NAMES_FUNCTION: { + const uint64_t count = data.getULEB128(c); + if (count >= std::numeric_limits::max()) + return llvm::createStringError("function count overflows uint32_t"); + + for (uint64_t i = 0; c && i < count; ++i) { + const uint64_t idx = data.getULEB128(c); + const std::optional name = GetWasmString(data, c); + if (!name || idx >= functions.size()) + continue; + symbols.emplace_back( + symbols.size(), Mangled(*name), lldb::eSymbolTypeCode, + /*external=*/false, /*is_debug=*/false, /*is_trampoline=*/false, + /*is_artificial=*/false, functions[idx], + /*size_is_valid=*/true, /*contains_linker_annotations=*/false, + /*flags=*/0); + } + } break; + case llvm::wasm::WASM_NAMES_DATA_SEGMENT: + case llvm::wasm::WASM_NAMES_GLOBAL: + case llvm::wasm::WASM_NAMES_LOCAL: + default: + std::optional offset = llvm::checkedAddUnsigned(c.tell(), size); + if (!offset) + return llvm::createStringError("offset overflows uint64_t"); + c.seek(*offset); + } + } + + if (!c) + return c.takeError(); + + return symbols; +} + +void ObjectFileWasm::ParseSymtab(Symtab &symtab) { + assert(m_sections_up && "sections must be parsed"); + Log *log = GetLog(LLDBLog::Object); + + // The name section contains names and indexes. First parse the functions from + // the code section so we can access them by their index. + SectionSP code_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeCode, false); + if (!code_section_sp) { + LLDB_LOG(log, "Failed to parse Wasm symbol table: no functions section"); + return; + } + + llvm::Expected> functions = + ParseFunctions(code_section_sp); + if (!functions) { + LLDB_LOG_ERROR(log, functions.takeError(), + "Failed to parse Wasm functions: {0}"); + return; + } + + // Parse the name section. + SectionSP name_section_sp = + m_sections_up->FindSectionByType(lldb::eSectionTypeWasmName, false); + if (!name_section_sp) { + LLDB_LOG(log, "Failed to parse Wasm symbol table: no names section"); + return; + } + + llvm::Expected> symbols = + ParseNames(name_section_sp, *functions); + if (!symbols) { + LLDB_LOG_ERROR(log, symbols.takeError(), "Failed to parse Wasm names: {0}"); + return; + } + + for (const Symbol &symbol : *symbols) + symtab.AddSymbol(symbol); + + symtab.Finalize(); +} static SectionType GetSectionTypeFromName(llvm::StringRef Name) { - if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) { + if (Name == "name") + return lldb::eSectionTypeWasmName; + if (Name.consume_front(".debug_") || Name.consume_front(".zdebug_")) return ObjectFile::GetDWARFSectionTypeFromName(Name); - } return eSectionTypeOther; } @@ -397,9 +522,9 @@ std::optional ObjectFileWasm::GetExternalDebugInfoFileSpec() { ReadImageData(sect_info.offset, kBufferSize); llvm::DataExtractor data = section_header_data.GetAsLLVM(); llvm::DataExtractor::Cursor c(0); - std::optional symbols_url = GetWasmString(data, c); + std::optional symbols_url = GetWasmString(data, c); if (symbols_url) - return FileSpec(symbols_url->GetStringRef()); + return FileSpec(*symbols_url); } } return std::nullopt; diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp index 21daf7476b522..7efce2a035505 100644 --- a/lldb/source/Symbol/ObjectFile.cpp +++ b/lldb/source/Symbol/ObjectFile.cpp @@ -379,6 +379,7 @@ AddressClass ObjectFile::GetAddressClass(addr_t file_addr) { case eSectionTypeELFDynamicSymbols: case eSectionTypeELFRelocationEntries: case eSectionTypeELFDynamicLinkInfo: + case eSectionTypeWasmName: case eSectionTypeOther: return AddressClass::eUnknown; case eSectionTypeAbsoluteAddress: diff --git a/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml new file mode 100644 index 0000000000000..165bb53662f40 --- /dev/null +++ b/lldb/test/Shell/Symtab/Inputs/simple.wasm.yaml @@ -0,0 +1,210 @@ +--- !WASM +FileHeader: + Version: 0x1 +Sections: + - Type: TYPE + Signatures: + - Index: 0 + ParamTypes: [] + ReturnTypes: [] + - Index: 1 + ParamTypes: + - I32 + - I32 + ReturnTypes: + - I32 + - Index: 2 + ParamTypes: [] + ReturnTypes: + - I32 + - Type: FUNCTION + FunctionTypes: [ 0, 1, 2, 1 ] + - Type: TABLE + Tables: + - Index: 0 + ElemType: FUNCREF + Limits: + Flags: [ HAS_MAX ] + Minimum: 0x1 + Maximum: 0x1 + - Type: MEMORY + Memories: + - Minimum: 0x2 + - Type: GLOBAL + Globals: + - Index: 0 + Type: I32 + Mutable: true + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 1 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 2 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 3 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 4 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 5 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1024 + - Index: 6 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 66560 + - Index: 7 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 131072 + - Index: 8 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 0 + - Index: 9 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 1 + - Index: 10 + Type: I32 + Mutable: false + InitExpr: + Opcode: I32_CONST + Value: 65536 + - Type: EXPORT + Exports: + - Name: memory + Kind: MEMORY + Index: 0 + - Name: __wasm_call_ctors + Kind: FUNCTION + Index: 0 + - Name: add + Kind: FUNCTION + Index: 1 + - Name: __original_main + Kind: FUNCTION + Index: 2 + - Name: main + Kind: FUNCTION + Index: 3 + - Name: __main_void + Kind: FUNCTION + Index: 2 + - Name: __indirect_function_table + Kind: TABLE + Index: 0 + - Name: __dso_handle + Kind: GLOBAL + Index: 1 + - Name: __data_end + Kind: GLOBAL + Index: 2 + - Name: __stack_low + Kind: GLOBAL + Index: 3 + - Name: __stack_high + Kind: GLOBAL + Index: 4 + - Name: __global_base + Kind: GLOBAL + Index: 5 + - Name: __heap_base + Kind: GLOBAL + Index: 6 + - Name: __heap_end + Kind: GLOBAL + Index: 7 + - Name: __memory_base + Kind: GLOBAL + Index: 8 + - Name: __table_base + Kind: GLOBAL + Index: 9 + - Name: __wasm_first_page_end + Kind: GLOBAL + Index: 10 + - Type: CODE + Functions: + - Index: 0 + Locals: [] + Body: 0B + - Index: 1 + Locals: + - Type: I32 + Count: 1 + Body: 23808080800041106B21022002200036020C20022001360208200228020C20022802086A0F0B + - Index: 2 + Locals: + - Type: I32 + Count: 2 + Body: 23808080800041106B210020002480808080002000410036020C2000410136020820004102360204200028020820002802041081808080002101200041106A24808080800020010F0B + - Index: 3 + Locals: [] + Body: 1082808080000F0B + - Type: CUSTOM + Name: name + FunctionNames: + - Index: 0 + Name: __wasm_call_ctors + - Index: 1 + Name: add + - Index: 2 + Name: __original_main + - Index: 3 + Name: main + GlobalNames: + - Index: 0 + Name: __stack_pointer + - Type: CUSTOM + Name: producers + Tools: + - Name: clang + Version: '22.0.0git' + - Type: CUSTOM + Name: target_features + Features: + - Prefix: USED + Name: bulk-memory + - Prefix: USED + Name: bulk-memory-opt + - Prefix: USED + Name: call-indirect-overlong + - Prefix: USED + Name: multivalue + - Prefix: USED + Name: mutable-globals + - Prefix: USED + Name: nontrapping-fptoint + - Prefix: USED + Name: reference-types + - Prefix: USED + Name: sign-ext +... diff --git a/lldb/test/Shell/Symtab/symtab-wasm.test b/lldb/test/Shell/Symtab/symtab-wasm.test new file mode 100644 index 0000000000000..fc185cd81a0ec --- /dev/null +++ b/lldb/test/Shell/Symtab/symtab-wasm.test @@ -0,0 +1,7 @@ +# RUN: yaml2obj %S/Inputs/simple.wasm.yaml -o %t.wasm +# RUN: %lldb %t.wasm -o 'image dump symtab' + +# CHECK: Code 0x0000000000000002 {{.*}} __wasm_call_ctors +# CHECK: Code 0x0000000000000005 {{.*}} add +# CHECK: Code 0x000000000000002f {{.*}} __original_main +# CHECK: Code 0x000000000000007c {{.*}} main From 2f79ef6a62bd475090a5c4e63c65a73bbe405d9c Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 12 Aug 2025 07:23:08 -0700 Subject: [PATCH 2/2] offset -> function offset --- lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp index 6d0b68d5ca54c..a489b05acfcb4 100644 --- a/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp +++ b/lldb/source/Plugins/ObjectFile/wasm/ObjectFileWasm.cpp @@ -274,7 +274,7 @@ ParseFunctions(SectionSP code_section_sp) { std::optional next_offset = llvm::checkedAddUnsigned(offset, function_size); if (!next_offset) - return llvm::createStringError("offset overflows uint64_t"); + return llvm::createStringError("function offset overflows uint64_t"); offset = *next_offset; }