@@ -390,6 +390,19 @@ const std::string& GetCode(uint16_t index) {
390
390
return table[index ];
391
391
}
392
392
393
+ // Used when use_string_literals = true.
394
+ const char * string_literal_def_template = " static const %s *%s_raw = " ;
395
+ constexpr std::string_view ascii_string_literal_start =
396
+ " reinterpret_cast<const uint8_t*>(R\" JS2C1b732aee(" ;
397
+ constexpr std::string_view utf16_string_literal_start =
398
+ " reinterpret_cast<const uint16_t*>(uR\" JS2C1b732aee(" ;
399
+ constexpr std::string_view string_literal_end = " )JS2C1b732aee\" );" ;
400
+
401
+ // Used when use_string_literals = false.
402
+ const char * array_literal_def_template = " static const %s %s_raw[] = " ;
403
+ constexpr std::string_view array_literal_start = " {\n " ;
404
+ constexpr std::string_view array_literal_end = " \n };\n\n " ;
405
+
393
406
// Definitions:
394
407
// static const uint8_t fs_raw[] = {
395
408
// ....
@@ -403,38 +416,93 @@ const std::string& GetCode(uint16_t index) {
403
416
//
404
417
// static StaticExternalTwoByteResource
405
418
// internal_cli_table_resource(internal_cli_table_raw, 1234, nullptr);
406
- constexpr std::string_view literal_end = " \n };\n\n " ;
419
+ //
420
+ // If use_string_literals is set, the data is output as C++ raw strings
421
+ // (i.e. R"JS2C1b732aee(...)JS2C1b732aee") rather than as an array. This speeds
422
+ // up compilation for gcc/clang.
407
423
template <typename T>
408
- Fragment GetDefinitionImpl (const std::vector<T >& code, const std::string& var) {
409
- size_t count = code. size ();
410
-
424
+ Fragment GetDefinitionImpl (const std::vector<char >& code,
425
+ const std::string& var,
426
+ bool use_string_literals) {
411
427
constexpr bool is_two_byte = std::is_same_v<T, uint16_t >;
412
428
static_assert (is_two_byte || std::is_same_v<T, char >);
429
+
430
+ size_t count = is_two_byte
431
+ ? simdutf::utf16_length_from_utf8 (code.data (), code.size ())
432
+ : code.size ();
413
433
constexpr size_t unit =
414
434
(is_two_byte ? 5 : 3 ) + 1 ; // 0-65536 or 0-127 and a ","
415
435
constexpr const char * arr_type = is_two_byte ? " uint16_t" : " uint8_t" ;
416
436
constexpr const char * resource_type = is_two_byte
417
437
? " StaticExternalTwoByteResource"
418
438
: " StaticExternalOneByteResource" ;
419
439
420
- size_t def_size = 256 + (count * unit);
440
+ size_t def_size = 512 + (use_string_literals ? code. size () : count * unit);
421
441
Fragment result (def_size, 0 );
422
442
423
443
int cur = snprintf (result.data (),
424
444
def_size,
425
- " static const %s %s_raw[] = {\n " ,
445
+ use_string_literals ? string_literal_def_template
446
+ : array_literal_def_template,
426
447
arr_type,
427
448
var.c_str ());
449
+
428
450
assert (cur != 0 );
429
- for (size_t i = 0 ; i < count; ++i) {
430
- // Avoid using snprintf on large chunks of data because it's much slower.
431
- // It's fine to use it on small amount of data though.
432
- const std::string& str = GetCode (static_cast <uint16_t >(code[i]));
433
- memcpy (result.data () + cur, str.c_str (), str.size ());
434
- cur += str.size ();
451
+
452
+ if (use_string_literals) {
453
+ constexpr std::string_view start_string_view =
454
+ is_two_byte ? utf16_string_literal_start : ascii_string_literal_start;
455
+
456
+ memcpy (result.data () + cur,
457
+ start_string_view.data (),
458
+ start_string_view.size ());
459
+ cur += start_string_view.size ();
460
+
461
+ memcpy (result.data () + cur, code.data (), code.size ());
462
+ cur += code.size ();
463
+
464
+ memcpy (result.data () + cur,
465
+ string_literal_end.data (),
466
+ string_literal_end.size ());
467
+ cur += string_literal_end.size ();
468
+ } else {
469
+ memcpy (result.data () + cur,
470
+ array_literal_start.data (),
471
+ array_literal_start.size ());
472
+ cur += array_literal_start.size ();
473
+
474
+ const std::vector<T>* codepoints;
475
+
476
+ std::vector<uint16_t > utf16_codepoints;
477
+ if constexpr (is_two_byte) {
478
+ utf16_codepoints.resize (count);
479
+ size_t utf16_count = simdutf::convert_utf8_to_utf16 (
480
+ code.data (),
481
+ code.size (),
482
+ reinterpret_cast <char16_t *>(utf16_codepoints.data ()));
483
+ assert (utf16_count != 0 );
484
+ utf16_codepoints.resize (utf16_count);
485
+ Debug (" static size %zu\n " , utf16_count);
486
+ codepoints = &utf16_codepoints;
487
+ } else {
488
+ // The code is ASCII, so no need to translate.
489
+ codepoints = &code;
490
+ }
491
+
492
+ for (size_t i = 0 ; i < codepoints->size (); ++i) {
493
+ // Avoid using snprintf on large chunks of data because it's much slower.
494
+ // It's fine to use it on small amount of data though.
495
+ const std::string& str = GetCode (static_cast <uint16_t >((*codepoints)[i]));
496
+
497
+ memcpy (result.data () + cur, str.c_str (), str.size ());
498
+ cur += str.size ();
499
+ }
500
+
501
+ memcpy (result.data () + cur,
502
+ array_literal_end.data (),
503
+ array_literal_end.size ());
504
+ cur += array_literal_end.size ();
435
505
}
436
- memcpy (result.data () + cur, literal_end.data (), literal_end.size ());
437
- cur += literal_end.size ();
438
506
439
507
int end_size = snprintf (result.data () + cur,
440
508
result.size () - cur,
@@ -448,30 +516,26 @@ Fragment GetDefinitionImpl(const std::vector<T>& code, const std::string& var) {
448
516
return result;
449
517
}
450
518
451
- Fragment GetDefinition (const std::string& var, const std::vector<char >& code) {
519
+ Fragment GetDefinition (const std::string& var,
520
+ const std::vector<char >& code,
521
+ bool use_string_literals) {
452
522
Debug (" GetDefinition %s, code size %zu " , var.c_str (), code.size ());
453
523
bool is_one_byte = simdutf::validate_ascii (code.data (), code.size ());
454
524
Debug (" with %s\n " , is_one_byte ? " 1-byte chars" : " 2-byte chars" );
455
525
456
526
if (is_one_byte) {
457
527
Debug (" static size %zu\n " , code.size ());
458
- return GetDefinitionImpl (code, var);
528
+ return GetDefinitionImpl< char > (code, var, use_string_literals );
459
529
} else {
460
- size_t length = simdutf::utf16_length_from_utf8 (code.data (), code.size ());
461
- std::vector<uint16_t > utf16 (length);
462
- size_t utf16_count = simdutf::convert_utf8_to_utf16 (
463
- code.data (), code.size (), reinterpret_cast <char16_t *>(utf16.data ()));
464
- assert (utf16_count != 0 );
465
- utf16.resize (utf16_count);
466
- Debug (" static size %zu\n " , utf16_count);
467
- return GetDefinitionImpl (utf16, var);
530
+ return GetDefinitionImpl<uint16_t >(code, var, use_string_literals);
468
531
}
469
532
}
470
533
471
534
int AddModule (const std::string& filename,
472
535
Fragments* definitions,
473
536
Fragments* initializers,
474
- Fragments* registrations) {
537
+ Fragments* registrations,
538
+ bool use_string_literals) {
475
539
Debug (" AddModule %s start\n " , filename.c_str ());
476
540
477
541
int error = 0 ;
@@ -486,7 +550,7 @@ int AddModule(const std::string& filename,
486
550
std::string file_id = GetFileId (filename);
487
551
std::string var = GetVariableName (file_id);
488
552
489
- definitions->emplace_back (GetDefinition (var, code));
553
+ definitions->emplace_back (GetDefinition (var, code, use_string_literals ));
490
554
491
555
// Initializers of the BuiltinSourceMap:
492
556
// {"fs", UnionBytes{&fs_resource}},
@@ -603,6 +667,7 @@ std::vector<char> JSONify(const std::vector<char>& code) {
603
667
604
668
int AddGypi (const std::string& var,
605
669
const std::string& filename,
670
+ bool use_string_literals,
606
671
Fragments* definitions) {
607
672
Debug (" AddGypi %s start\n " , filename.c_str ());
608
673
@@ -618,14 +683,16 @@ int AddGypi(const std::string& var,
618
683
assert (var == " config" );
619
684
620
685
std::vector<char > transformed = JSONify (code);
621
- definitions->emplace_back (GetDefinition (var, transformed));
686
+ definitions->emplace_back (
687
+ GetDefinition (var, transformed, use_string_literals));
622
688
return 0 ;
623
689
}
624
690
625
691
int JS2C (const FileList& js_files,
626
692
const FileList& mjs_files,
627
693
const std::string& config,
628
- const std::string& dest) {
694
+ const std::string& dest,
695
+ bool use_string_literals) {
629
696
Fragments defintions;
630
697
defintions.reserve (js_files.size () + mjs_files.size () + 1 );
631
698
Fragments initializers;
@@ -634,21 +701,29 @@ int JS2C(const FileList& js_files,
634
701
registrations.reserve (js_files.size () + mjs_files.size () + 1 );
635
702
636
703
for (const auto & filename : js_files) {
637
- int r = AddModule (filename, &defintions, &initializers, ®istrations);
704
+ int r = AddModule (filename,
705
+ &defintions,
706
+ &initializers,
707
+ ®istrations,
708
+ use_string_literals);
638
709
if (r != 0 ) {
639
710
return r;
640
711
}
641
712
}
642
713
for (const auto & filename : mjs_files) {
643
- int r = AddModule (filename, &defintions, &initializers, ®istrations);
714
+ int r = AddModule (filename,
715
+ &defintions,
716
+ &initializers,
717
+ ®istrations,
718
+ use_string_literals);
644
719
if (r != 0 ) {
645
720
return r;
646
721
}
647
722
}
648
723
649
724
assert (config == " config.gypi" );
650
725
// "config.gypi" -> config_raw.
651
- int r = AddGypi (" config" , config, &defintions);
726
+ int r = AddGypi (" config" , config, use_string_literals, &defintions);
652
727
if (r != 0 ) {
653
728
return r;
654
729
}
@@ -673,6 +748,7 @@ int Main(int argc, char* argv[]) {
673
748
std::vector<std::string> args;
674
749
args.reserve (argc);
675
750
std::string root_dir;
751
+ bool use_string_literals = false ;
676
752
for (int i = 1 ; i < argc; ++i) {
677
753
std::string arg (argv[i]);
678
754
if (arg == " --verbose" ) {
@@ -683,6 +759,8 @@ int Main(int argc, char* argv[]) {
683
759
return 1 ;
684
760
}
685
761
root_dir = argv[++i];
762
+ } else if (arg == " --use-string-literals" ) {
763
+ use_string_literals = true ;
686
764
} else {
687
765
args.emplace_back (argv[i]);
688
766
}
@@ -744,7 +822,8 @@ int Main(int argc, char* argv[]) {
744
822
std::sort (js_it->second .begin (), js_it->second .end ());
745
823
std::sort (mjs_it->second .begin (), mjs_it->second .end ());
746
824
747
- return JS2C (js_it->second , mjs_it->second , config, output);
825
+ return JS2C (
826
+ js_it->second , mjs_it->second , config, output, use_string_literals);
748
827
}
749
828
} // namespace js2c
750
829
} // namespace node
0 commit comments