22
22
#include " llvm/ADT/StringRef.h"
23
23
#include " llvm/BinaryFormat/Magic.h"
24
24
#include " llvm/BinaryFormat/Wasm.h"
25
+ #include " llvm/Support/CheckedArithmetic.h"
25
26
#include " llvm/Support/Endian.h"
26
27
#include " llvm/Support/Format.h"
27
28
#include < optional>
@@ -50,7 +51,8 @@ static bool ValidateModuleHeader(const DataBufferSP &data_sp) {
50
51
return version == llvm::wasm::WasmVersion;
51
52
}
52
53
53
- static std::optional<ConstString>
54
+ // FIXME: Use lldb::DataExtractor instead of llvm::DataExtractor.
55
+ static std::optional<std::string>
54
56
GetWasmString (llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
55
57
// A Wasm string is encoded as a vector of UTF-8 codes.
56
58
// Vectors are encoded with their u32 length followed by the element
@@ -61,7 +63,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
61
63
return std::nullopt;
62
64
}
63
65
64
- if (len >= ( uint64_t ( 1 ) << 32 )) {
66
+ if (len > std::numeric_limits< uint32_t >:: max ( )) {
65
67
return std::nullopt;
66
68
}
67
69
@@ -72,8 +74,7 @@ GetWasmString(llvm::DataExtractor &data, llvm::DataExtractor::Cursor &c) {
72
74
return std::nullopt;
73
75
}
74
76
75
- llvm::StringRef str = toStringRef (llvm::ArrayRef (str_storage));
76
- return ConstString (str);
77
+ return std::string (toStringRef (llvm::ArrayRef (str_storage)));
77
78
}
78
79
79
80
char ObjectFileWasm::ID;
@@ -174,15 +175,15 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
174
175
if (!c)
175
176
return !llvm::errorToBool (c.takeError ());
176
177
177
- if (payload_len >= ( uint64_t ( 1 ) << 32 ))
178
+ if (payload_len > std::numeric_limits< uint32_t >:: max ( ))
178
179
return false ;
179
180
180
181
if (section_id == llvm::wasm::WASM_SEC_CUSTOM) {
181
182
// Custom sections have the id 0. Their contents consist of a name
182
183
// identifying the custom section, followed by an uninterpreted sequence
183
184
// of bytes.
184
185
lldb::offset_t prev_offset = c.tell ();
185
- std::optional<ConstString > sect_name = GetWasmString (data, c);
186
+ std::optional<std::string > sect_name = GetWasmString (data, c);
186
187
if (!sect_name)
187
188
return false ;
188
189
@@ -191,7 +192,7 @@ bool ObjectFileWasm::DecodeNextSection(lldb::offset_t *offset_ptr) {
191
192
192
193
uint32_t section_length = payload_len - (c.tell () - prev_offset);
193
194
m_sect_infos.push_back (section_info{*offset_ptr + c.tell (), section_length,
194
- section_id, *sect_name});
195
+ section_id, ConstString ( *sect_name) });
195
196
*offset_ptr += (c.tell () + section_length);
196
197
} else if (section_id <= llvm::wasm::WASM_SEC_LAST_KNOWN) {
197
198
m_sect_infos.push_back (section_info{*offset_ptr + c.tell (),
@@ -248,12 +249,203 @@ bool ObjectFileWasm::ParseHeader() {
248
249
return true ;
249
250
}
250
251
251
- void ObjectFileWasm::ParseSymtab (Symtab &symtab) {}
252
+ static llvm::Expected<std::vector<AddressRange>>
253
+ ParseFunctions (SectionSP code_section_sp) {
254
+ DataExtractor data;
255
+ code_section_sp->GetSectionData (data);
256
+ lldb::offset_t offset = 0 ;
257
+
258
+ const uint64_t function_count = data.GetULEB128 (&offset);
259
+ if (function_count > std::numeric_limits<uint32_t >::max ())
260
+ return llvm::createStringError (" function count overflows uint32_t" );
261
+
262
+ std::vector<AddressRange> functions;
263
+ functions.reserve (function_count);
264
+
265
+ for (uint32_t i = 0 ; i < function_count; ++i) {
266
+ const uint64_t function_size = data.GetULEB128 (&offset);
267
+ if (function_size > std::numeric_limits<uint32_t >::max ())
268
+ return llvm::createStringError (" function size overflows uint32_t" );
269
+ // llvm-objdump considers the ULEB with the function size to be part of the
270
+ // function. We can't do that here because that would break symbolic
271
+ // breakpoints, as that address is never executed.
272
+ functions.emplace_back (code_section_sp, offset, function_size);
273
+
274
+ std::optional<lldb::offset_t > next_offset =
275
+ llvm::checkedAddUnsigned (offset, function_size);
276
+ if (!next_offset)
277
+ return llvm::createStringError (" function offset overflows uint64_t" );
278
+ offset = *next_offset;
279
+ }
280
+
281
+ return functions;
282
+ }
283
+
284
+ static llvm::Expected<std::vector<AddressRange>>
285
+ ParseData (SectionSP data_section_sp) {
286
+ DataExtractor data;
287
+ data_section_sp->GetSectionData (data);
288
+
289
+ lldb::offset_t offset = 0 ;
290
+
291
+ const uint64_t segment_count = data.GetULEB128 (&offset);
292
+ if (segment_count > std::numeric_limits<uint32_t >::max ())
293
+ return llvm::createStringError (" segment count overflows uint32_t" );
294
+
295
+ std::vector<AddressRange> segments;
296
+ segments.reserve (segment_count);
297
+
298
+ for (uint32_t i = 0 ; i < segment_count; ++i) {
299
+ const uint64_t flags = data.GetULEB128 (&offset);
300
+ if (flags > std::numeric_limits<uint32_t >::max ())
301
+ return llvm::createStringError (" segment flags overflows uint32_t" );
302
+
303
+ const uint64_t segment_size = data.GetULEB128 (&offset);
304
+ if (flags > std::numeric_limits<uint32_t >::max ())
305
+ return llvm::createStringError (" segment size overflows uint32_t" );
306
+
307
+ segments.emplace_back (data_section_sp, offset, segment_size);
308
+
309
+ std::optional<lldb::offset_t > next_offset =
310
+ llvm::checkedAddUnsigned (offset, segment_size);
311
+ if (!next_offset)
312
+ return llvm::createStringError (" segment offset overflows uint64_t" );
313
+ offset = *next_offset;
314
+ }
315
+
316
+ return segments;
317
+ }
318
+
319
+ static llvm::Expected<std::vector<Symbol>>
320
+ ParseNames (SectionSP name_section_sp,
321
+ const std::vector<AddressRange> &function_ranges,
322
+ const std::vector<AddressRange> &segment_ranges) {
323
+ DataExtractor name_section_data;
324
+ name_section_sp->GetSectionData (name_section_data);
325
+
326
+ llvm::DataExtractor data = name_section_data.GetAsLLVM ();
327
+ llvm::DataExtractor::Cursor c (0 );
328
+ std::vector<Symbol> symbols;
329
+ while (c && c.tell () < data.size ()) {
330
+ const uint8_t type = data.getU8 (c);
331
+ const uint64_t size = data.getULEB128 (c);
332
+ if (size > std::numeric_limits<uint32_t >::max ())
333
+ return llvm::createStringError (" size overflows uint32_t" );
334
+
335
+ switch (type) {
336
+ case llvm::wasm::WASM_NAMES_FUNCTION: {
337
+ const uint64_t count = data.getULEB128 (c);
338
+ if (count > std::numeric_limits<uint32_t >::max ())
339
+ return llvm::createStringError (" function count overflows uint32_t" );
340
+
341
+ for (uint64_t i = 0 ; c && i < count; ++i) {
342
+ const uint64_t idx = data.getULEB128 (c);
343
+ const std::optional<std::string> name = GetWasmString (data, c);
344
+ if (!name || idx >= function_ranges.size ())
345
+ continue ;
346
+ symbols.emplace_back (
347
+ symbols.size (), Mangled (*name), lldb::eSymbolTypeCode,
348
+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
349
+ /* is_artificial=*/ false , function_ranges[idx],
350
+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
351
+ /* flags=*/ 0 );
352
+ }
353
+ } break ;
354
+ case llvm::wasm::WASM_NAMES_DATA_SEGMENT: {
355
+ const uint64_t count = data.getULEB128 (c);
356
+ if (count > std::numeric_limits<uint32_t >::max ())
357
+ return llvm::createStringError (" data count overflows uint32_t" );
358
+ for (uint64_t i = 0 ; c && i < count; ++i) {
359
+ const uint64_t idx = data.getULEB128 (c);
360
+ const std::optional<std::string> name = GetWasmString (data, c);
361
+ if (!name || idx >= segment_ranges.size ())
362
+ continue ;
363
+ symbols.emplace_back (
364
+ symbols.size (), Mangled (*name), lldb::eSymbolTypeData,
365
+ /* external=*/ false , /* is_debug=*/ false , /* is_trampoline=*/ false ,
366
+ /* is_artificial=*/ false , segment_ranges[idx],
367
+ /* size_is_valid=*/ true , /* contains_linker_annotations=*/ false ,
368
+ /* flags=*/ 0 );
369
+ }
370
+
371
+ } break ;
372
+ case llvm::wasm::WASM_NAMES_GLOBAL:
373
+ case llvm::wasm::WASM_NAMES_LOCAL:
374
+ default :
375
+ std::optional<uint64_t > offset = llvm::checkedAddUnsigned (c.tell (), size);
376
+ if (!offset)
377
+ return llvm::createStringError (" offset overflows uint64_t" );
378
+ c.seek (*offset);
379
+ }
380
+ }
381
+
382
+ if (!c)
383
+ return c.takeError ();
384
+
385
+ return symbols;
386
+ }
387
+
388
+ void ObjectFileWasm::ParseSymtab (Symtab &symtab) {
389
+ assert (m_sections_up && " sections must be parsed" );
390
+ Log *log = GetLog (LLDBLog::Object);
391
+
392
+ // The name section contains names and indexes. First parse the data from the
393
+ // relevant sections so we can access it by its index.
394
+ std::vector<AddressRange> function_ranges;
395
+ std::vector<AddressRange> segment_ranges;
396
+
397
+ // Parse the code section.
398
+ if (SectionSP code_section_sp =
399
+ m_sections_up->FindSectionByType (lldb::eSectionTypeCode, false )) {
400
+ llvm::Expected<std::vector<AddressRange>> functions =
401
+ ParseFunctions (code_section_sp);
402
+ if (!functions) {
403
+ LLDB_LOG_ERROR (log, functions.takeError (),
404
+ " Failed to parse Wasm code section: {0}" );
405
+ return ;
406
+ }
407
+ function_ranges = *functions;
408
+ }
409
+
410
+ // Parse the data section.
411
+ if (SectionSP data_section_sp =
412
+ m_sections_up->FindSectionByType (lldb::eSectionTypeData, false )) {
413
+ llvm::Expected<std::vector<AddressRange>> segments =
414
+ ParseData (data_section_sp);
415
+ if (!segments) {
416
+ LLDB_LOG_ERROR (log, segments.takeError (),
417
+ " Failed to parse Wasm data section: {0}" );
418
+ return ;
419
+ }
420
+ segment_ranges = *segments;
421
+ }
422
+
423
+ // Parse the name section.
424
+ SectionSP name_section_sp =
425
+ m_sections_up->FindSectionByType (lldb::eSectionTypeWasmName, false );
426
+ if (!name_section_sp) {
427
+ LLDB_LOG (log, " Failed to parse Wasm symbol table: no names section" );
428
+ return ;
429
+ }
430
+
431
+ llvm::Expected<std::vector<Symbol>> symbols =
432
+ ParseNames (name_section_sp, function_ranges, segment_ranges);
433
+ if (!symbols) {
434
+ LLDB_LOG_ERROR (log, symbols.takeError (), " Failed to parse Wasm names: {0}" );
435
+ return ;
436
+ }
437
+
438
+ for (const Symbol &symbol : *symbols)
439
+ symtab.AddSymbol (symbol);
440
+
441
+ symtab.Finalize ();
442
+ }
252
443
253
444
static SectionType GetSectionTypeFromName (llvm::StringRef Name) {
254
- if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" )) {
445
+ if (Name == " name" )
446
+ return lldb::eSectionTypeWasmName;
447
+ if (Name.consume_front (" .debug_" ) || Name.consume_front (" .zdebug_" ))
255
448
return ObjectFile::GetDWARFSectionTypeFromName (Name);
256
- }
257
449
return eSectionTypeOther;
258
450
}
259
451
@@ -283,6 +475,9 @@ void ObjectFileWasm::CreateSections(SectionList &unified_section_list) {
283
475
// For this reason Section::GetFileAddress() must return zero for the
284
476
// Code section.
285
477
vm_addr = 0 ;
478
+ } else if (llvm::wasm::WASM_SEC_DATA == sect_info.id ) {
479
+ section_type = eSectionTypeData;
480
+ section_name = ConstString (" data" );
286
481
} else {
287
482
section_type = GetSectionTypeFromName (sect_info.name .GetStringRef ());
288
483
if (section_type == eSectionTypeOther)
@@ -397,9 +592,9 @@ std::optional<FileSpec> ObjectFileWasm::GetExternalDebugInfoFileSpec() {
397
592
ReadImageData (sect_info.offset , kBufferSize );
398
593
llvm::DataExtractor data = section_header_data.GetAsLLVM ();
399
594
llvm::DataExtractor::Cursor c (0 );
400
- std::optional<ConstString > symbols_url = GetWasmString (data, c);
595
+ std::optional<std::string > symbols_url = GetWasmString (data, c);
401
596
if (symbols_url)
402
- return FileSpec (symbols_url-> GetStringRef () );
597
+ return FileSpec (* symbols_url);
403
598
}
404
599
}
405
600
return std::nullopt;
0 commit comments