@@ -390,6 +390,17 @@ const std::string& GetCode(uint16_t index) {
390
390
return table[index ];
391
391
}
392
392
393
+ const char * string_literal_def_template = " static const %s *%s_raw = " ;
394
+ constexpr std::string_view ascii_string_literal_start =
395
+ " reinterpret_cast<const uint8_t*>(R\" JS2C1b732aee(" ;
396
+ constexpr std::string_view utf16_string_literal_start =
397
+ " reinterpret_cast<const uint16_t*>(uR\" JS2C1b732aee(" ;
398
+ constexpr std::string_view string_literal_end = " )JS2C1b732aee\" );" ;
399
+
400
+ const char * array_literal_def_template = " static const %s %s_raw[] = " ;
401
+ constexpr std::string_view array_literal_start = " {\n " ;
402
+ constexpr std::string_view array_literal_end = " \n };\n\n " ;
403
+
393
404
// Definitions:
394
405
// static const uint8_t fs_raw[] = {
395
406
// ....
@@ -403,38 +414,92 @@ const std::string& GetCode(uint16_t index) {
403
414
//
404
415
// static StaticExternalTwoByteResource
405
416
// internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr);
406
- constexpr std::string_view literal_end = " \n };\n\n " ;
417
+ //
418
+ // If use_string_literals is set, the data is output as C++ raw strings
419
+ // (i.e. R"JS2C1b732aee(...)JS2C1b732aee") rather than as an array. This speeds
420
+ // up compilation for gcc/clang.
407
421
template <typename T>
408
- Fragment GetDefinitionImpl (const std::vector<T >& code, const std::string& var) {
409
- size_t count = code. size ();
410
-
422
+ Fragment GetDefinitionImpl (const std::vector<char >& code,
423
+ const std::string& var,
424
+ bool use_string_literals) {
411
425
constexpr bool is_two_byte = std::is_same_v<T, uint16_t >;
412
426
static_assert (is_two_byte || std::is_same_v<T, char >);
427
+
428
+ size_t count = is_two_byte
429
+ ? simdutf::utf16_length_from_utf8 (code.data (), code.size ())
430
+ : code.size ();
413
431
constexpr size_t unit =
414
432
(is_two_byte ? 5 : 3 ) + 1 ; // 0-65536 or 0-127 and a ","
415
433
constexpr const char * arr_type = is_two_byte ? " uint16_t" : " uint8_t" ;
416
434
constexpr const char * resource_type = is_two_byte
417
435
? " StaticExternalTwoByteResource"
418
436
: " StaticExternalOneByteResource" ;
419
437
420
- size_t def_size = 256 + (count * unit);
438
+ size_t def_size = 512 + (use_string_literals ? code. size () : count * unit);
421
439
Fragment result (def_size, 0 );
422
440
423
441
int cur = snprintf (result.data (),
424
442
def_size,
425
- " static const %s %s_raw[] = {\n " ,
443
+ use_string_literals ? string_literal_def_template
444
+ : array_literal_def_template,
426
445
arr_type,
427
446
var.c_str ());
447
+
428
448
assert (cur != 0 );
429
- for (size_t i = 0 ; i < count; ++i) {
430
- // Avoid using snprintf on large chunks of data because it's much slower.
431
- // It's fine to use it on small amount of data though.
432
- const std::string& str = GetCode (static_cast <uint16_t >(code[i]));
433
- memcpy (result.data () + cur, str.c_str (), str.size ());
434
- cur += str.size ();
449
+
450
+ if (use_string_literals) {
451
+ constexpr std::string_view start_string_view =
452
+ is_two_byte ? utf16_string_literal_start : ascii_string_literal_start;
453
+
454
+ memcpy (result.data () + cur,
455
+ start_string_view.data (),
456
+ start_string_view.size ());
457
+ cur += start_string_view.size ();
458
+
459
+ memcpy (result.data () + cur, code.data (), code.size ());
460
+ cur += code.size ();
461
+
462
+ memcpy (result.data () + cur,
463
+ string_literal_end.data (),
464
+ string_literal_end.size ());
465
+ cur += string_literal_end.size ();
466
+ } else {
467
+ memcpy (result.data () + cur,
468
+ array_literal_start.data (),
469
+ array_literal_start.size ());
470
+ cur += array_literal_start.size ();
471
+
472
+ std::vector<uint16_t > utf16_codepoints;
473
+ const std::vector<T>* codepoints;
474
+ if constexpr (is_two_byte) {
475
+ size_t length = simdutf::utf16_length_from_utf8 (code.data (), code.size ());
476
+ utf16_codepoints.resize (length);
477
+ size_t utf16_count = simdutf::convert_utf8_to_utf16 (
478
+ code.data (),
479
+ code.size (),
480
+ reinterpret_cast <char16_t *>(utf16_codepoints.data ()));
481
+ assert (utf16_count != 0 );
482
+ utf16_codepoints.resize (utf16_count);
483
+ Debug (" static size %zu\n " , utf16_count);
484
+ codepoints = &utf16_codepoints;
485
+ } else {
486
+ codepoints = &code;
487
+ }
488
+
489
+ for (size_t i = 0 ; i < codepoints->size (); ++i) {
490
+ // Avoid using snprintf on large chunks of data because it's much slower.
491
+ // It's fine to use it on small amount of data though.
492
+ const std::string& str = GetCode (static_cast <uint16_t >((*codepoints)[i]));
493
+
494
+ memcpy (result.data () + cur, str.c_str (), str.size ());
495
+ cur += str.size ();
496
+ }
497
+
498
+ memcpy (result.data () + cur,
499
+ array_literal_end.data (),
500
+ array_literal_end.size ());
501
+ cur += array_literal_end.size ();
435
502
}
436
- memcpy (result.data () + cur, literal_end.data (), literal_end.size ());
437
- cur += literal_end.size ();
438
503
439
504
int end_size = snprintf (result.data () + cur,
440
505
result.size () - cur,
@@ -448,30 +513,26 @@ Fragment GetDefinitionImpl(const std::vector<T>& code, const std::string& var) {
448
513
return result;
449
514
}
450
515
451
- Fragment GetDefinition (const std::string& var, const std::vector<char >& code) {
516
+ Fragment GetDefinition (const std::string& var,
517
+ const std::vector<char >& code,
518
+ bool use_string_literals) {
452
519
Debug (" GetDefinition %s, code size %zu " , var.c_str (), code.size ());
453
520
bool is_one_byte = simdutf::validate_ascii (code.data (), code.size ());
454
521
Debug (" with %s\n " , is_one_byte ? " 1-byte chars" : " 2-byte chars" );
455
522
456
523
if (is_one_byte) {
457
524
Debug (" static size %zu\n " , code.size ());
458
- return GetDefinitionImpl (code, var);
525
+ return GetDefinitionImpl< char > (code, var, use_string_literals );
459
526
} else {
460
- size_t length = simdutf::utf16_length_from_utf8 (code.data (), code.size ());
461
- std::vector<uint16_t > utf16 (length);
462
- size_t utf16_count = simdutf::convert_utf8_to_utf16 (
463
- code.data (), code.size (), reinterpret_cast <char16_t *>(utf16.data ()));
464
- assert (utf16_count != 0 );
465
- utf16.resize (utf16_count);
466
- Debug (" static size %zu\n " , utf16_count);
467
- return GetDefinitionImpl (utf16, var);
527
+ return GetDefinitionImpl<uint16_t >(code, var, use_string_literals);
468
528
}
469
529
}
470
530
471
531
int AddModule (const std::string& filename,
472
532
Fragments* definitions,
473
533
Fragments* initializers,
474
- Fragments* registrations) {
534
+ Fragments* registrations,
535
+ bool use_string_literals) {
475
536
Debug (" AddModule %s start\n " , filename.c_str ());
476
537
477
538
int error = 0 ;
@@ -486,7 +547,7 @@ int AddModule(const std::string& filename,
486
547
std::string file_id = GetFileId (filename);
487
548
std::string var = GetVariableName (file_id);
488
549
489
- definitions->emplace_back (GetDefinition (var, code));
550
+ definitions->emplace_back (GetDefinition (var, code, use_string_literals ));
490
551
491
552
// Initializers of the BuiltinSourceMap:
492
553
// {"fs", UnionBytes{&fs_resource}},
@@ -603,6 +664,7 @@ std::vector<char> JSONify(const std::vector<char>& code) {
603
664
604
665
int AddGypi (const std::string& var,
605
666
const std::string& filename,
667
+ bool use_string_literals,
606
668
Fragments* definitions) {
607
669
Debug (" AddGypi %s start\n " , filename.c_str ());
608
670
@@ -618,14 +680,16 @@ int AddGypi(const std::string& var,
618
680
assert (var == " config" );
619
681
620
682
std::vector<char > transformed = JSONify (code);
621
- definitions->emplace_back (GetDefinition (var, transformed));
683
+ definitions->emplace_back (
684
+ GetDefinition (var, transformed, use_string_literals));
622
685
return 0 ;
623
686
}
624
687
625
688
int JS2C (const FileList& js_files,
626
689
const FileList& mjs_files,
627
690
const std::string& config,
628
- const std::string& dest) {
691
+ const std::string& dest,
692
+ bool use_string_literals) {
629
693
Fragments defintions;
630
694
defintions.reserve (js_files.size () + mjs_files.size () + 1 );
631
695
Fragments initializers;
@@ -634,21 +698,29 @@ int JS2C(const FileList& js_files,
634
698
registrations.reserve (js_files.size () + mjs_files.size () + 1 );
635
699
636
700
for (const auto & filename : js_files) {
637
- int r = AddModule (filename, &defintions, &initializers, ®istrations);
701
+ int r = AddModule (filename,
702
+ &defintions,
703
+ &initializers,
704
+ ®istrations,
705
+ use_string_literals);
638
706
if (r != 0 ) {
639
707
return r;
640
708
}
641
709
}
642
710
for (const auto & filename : mjs_files) {
643
- int r = AddModule (filename, &defintions, &initializers, ®istrations);
711
+ int r = AddModule (filename,
712
+ &defintions,
713
+ &initializers,
714
+ ®istrations,
715
+ use_string_literals);
644
716
if (r != 0 ) {
645
717
return r;
646
718
}
647
719
}
648
720
649
721
assert (config == " config.gypi" );
650
722
// "config.gypi" -> config_raw.
651
- int r = AddGypi (" config" , config, &defintions);
723
+ int r = AddGypi (" config" , config, use_string_literals, &defintions);
652
724
if (r != 0 ) {
653
725
return r;
654
726
}
@@ -673,6 +745,7 @@ int Main(int argc, char* argv[]) {
673
745
std::vector<std::string> args;
674
746
args.reserve (argc);
675
747
std::string root_dir;
748
+ bool use_string_literals = false ;
676
749
for (int i = 1 ; i < argc; ++i) {
677
750
std::string arg (argv[i]);
678
751
if (arg == " --verbose" ) {
@@ -683,6 +756,8 @@ int Main(int argc, char* argv[]) {
683
756
return 1 ;
684
757
}
685
758
root_dir = argv[++i];
759
+ } else if (arg == " --use-string-literals" ) {
760
+ use_string_literals = true ;
686
761
} else {
687
762
args.emplace_back (argv[i]);
688
763
}
@@ -744,7 +819,8 @@ int Main(int argc, char* argv[]) {
744
819
std::sort (js_it->second .begin (), js_it->second .end ());
745
820
std::sort (mjs_it->second .begin (), mjs_it->second .end ());
746
821
747
- return JS2C (js_it->second , mjs_it->second , config, output);
822
+ return JS2C (
823
+ js_it->second , mjs_it->second , config, output, use_string_literals);
748
824
}
749
825
} // namespace js2c
750
826
} // namespace node
0 commit comments