@@ -564,125 +564,149 @@ fn ReadConsoleWithUtf16ToUtf8Conversion(hConsoleInput: HANDLE, buffer: []u8) Rea
564
564
error .ConsoleHandleLimitReached = > @panic ("Reached maximum number of 64 console handles." ),
565
565
else = > return error .Unexpected ,
566
566
};
567
- // The temporary buffer can be huge, so keep it away from stack
568
- var heap_allocator : std.heap.HeapAllocator = std .heap .HeapAllocator .init ();
569
- defer heap_allocator .deinit ();
570
- const allocator : std.mem.Allocator = heap_allocator .allocator ();
571
- var temp_buffer : []u8 = allocator .alloc (u8 , buffer .len ) catch @panic ("Out of memory." );
572
- defer allocator .free (temp_buffer );
573
-
567
+ var temp_buffer : [1024 ]u8 = undefined ;
574
568
var bytes_read : DWORD = 0 ;
575
569
var reached_end_of_line : bool = false ;
576
-
577
- // Try flushing leftover UTF-8 bytes first (one codepoint at most)
578
- if (handle_data .utf8_buffer .bytes_used != 0 ) {
579
- // LF will only appear at the first byte and there will be only one byte in the buffer
580
- if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
581
- assert (handle_data .utf8_buffer .bytes_used == 1 );
582
- reached_end_of_line = true ;
583
- }
584
- // Is there enough space for all bytes in UTF-8 buffer?
585
- const has_enough_space : bool = buffer .len >= handle_data .utf8_buffer .bytes_used ;
586
- const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else buffer .len ;
587
- for (0.. max_bytes_to_read ) | index | {
588
- temp_buffer [index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
589
- // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
590
- handle_data .utf8_buffer .front_index +%= 1 ;
591
- }
592
- bytes_read += @truncate (max_bytes_to_read );
593
- handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
594
- if (has_enough_space ) {
595
- // UTF-8 buffer is now empty, we can safely reset front_index to zero
596
- handle_data .utf8_buffer .front_index = 0 ;
597
- } else {
598
- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , false );
599
- }
600
- // LF ends a console read immediately
601
- if (reached_end_of_line ) {
602
- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , false );
603
- }
604
- }
605
- assert (handle_data .utf8_buffer .front_index == 0 );
570
+ const TruncateState = enum {
571
+ do_not_truncate ,
572
+ truncate_after_SUB ,
573
+ truncate_all ,
574
+ };
575
+ var truncate_state : TruncateState = .do_not_truncate ;
606
576
while (bytes_read < buffer .len ) {
607
- // Read only one code unit each loop
608
- var utf16_code_unit : u16 = undefined ;
609
- var utf16_code_units_read : DWORD = undefined ;
610
- if (kernel32 .ReadConsoleW (hConsoleInput , & utf16_code_unit , 1 , & utf16_code_units_read , null ) == FALSE ) {
611
- switch (kernel32 .GetLastError ()) {
612
- .INVALID_HANDLE = > return error .NotOpenForReading ,
613
- else = > | err | return unexpectedError (err ),
614
- }
577
+ const remaining_buffer : []u8 = buffer [bytes_read .. buffer .len ];
578
+ var has_enough_space_in_remaining_buffer : bool = undefined ;
579
+ var bytes_read_into_temp_buffer : DWORD = 0 ;
580
+ var truncate_index : DWORD = undefined ;
581
+ // If a SUB character is encountered in a previous loop, truncate everything in this loop
582
+ if (truncate_state == .truncate_after_SUB ) {
583
+ truncate_state = .truncate_all ;
615
584
}
616
- if (utf16_code_unit == 0x000D ) {
617
- // CR should always be followed by an LF, so just discard it
618
- continue ;
619
- } else if (utf16_code_unit >= 0xD800 and utf16_code_unit <= 0xDBFF ) {
620
- // When a high surrogate is encountered, store it into the UTF-16 buffer
621
- assert (handle_data .utf16_buffer .code_units_used == 0 );
622
- handle_data .utf16_buffer .data [0 ] = utf16_code_unit ;
623
- handle_data .utf16_buffer .code_units_used = 1 ;
624
- continue ;
625
- } else if (utf16_code_unit >= 0xDC00 and utf16_code_unit <= 0xDFFF ) {
626
- // When a low surrogate is encountered, assemble surrogate pair and convert to UTF-8
627
- if (! (utf16_code_units_read == 1 and
628
- handle_data .utf16_buffer .data [0 ] >= 0xD800 and handle_data .utf16_buffer .data [0 ] <= 0xDBFF )) {
629
- unreachable ;
585
+ // Try flushing leftover UTF-8 bytes first (one codepoint at most)
586
+ if (handle_data .utf8_buffer .bytes_used != 0 ) {
587
+ if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
588
+ assert (handle_data .utf8_buffer .bytes_used == 1 );
589
+ reached_end_of_line = true ;
590
+ } else if (handle_data .utf8_buffer .data [0 ] == 0x1A ) {
591
+ assert (handle_data .utf8_buffer .bytes_used == 1 );
592
+ // Truncate after SUB character in this loop if we never truncated in previous loops
593
+ if (truncate_state == .do_not_truncate ) {
594
+ truncate_state = .truncate_after_SUB ;
595
+ truncate_index = 1 ;
596
+ }
597
+ }
598
+ // Is there enough space for all bytes in UTF-8 buffer?
599
+ const has_enough_space : bool = remaining_buffer .len >= handle_data .utf8_buffer .bytes_used ;
600
+ const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else remaining_buffer .len ;
601
+ for (0.. max_bytes_to_read ) | index | {
602
+ temp_buffer [index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
603
+ // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
604
+ handle_data .utf8_buffer .front_index +%= 1 ;
605
+ }
606
+ bytes_read_into_temp_buffer += @truncate (max_bytes_to_read );
607
+ handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
608
+ if (has_enough_space ) {
609
+ // UTF-8 buffer is now empty, we can safely reset front_index to zero
610
+ handle_data .utf8_buffer .front_index = 0 ;
611
+ } else {
612
+ switch (truncate_state ) {
613
+ .truncate_all = > {},
614
+ else = > @memcpy (remaining_buffer [0.. bytes_read_into_temp_buffer ], temp_buffer [0.. bytes_read_into_temp_buffer ]),
615
+ }
616
+ bytes_read += bytes_read_into_temp_buffer ;
617
+ break ;
630
618
}
631
- handle_data .utf16_buffer .data [1 ] = utf16_code_unit ;
632
- handle_data .utf16_buffer .code_units_used = 0 ;
633
- const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , & handle_data .utf16_buffer .data ) catch return error .Unexpected ;
634
- assert (utf8_bytes == 4 );
635
- handle_data .utf8_buffer .bytes_used = 4 ;
636
- } else {
637
- assert (handle_data .utf16_buffer .code_units_used == 0 );
638
- const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , @as (* [1 ]u16 , & utf16_code_unit )) catch return error .Unexpected ;
639
- handle_data .utf8_buffer .bytes_used = @truncate (utf8_bytes );
640
619
// LF ends a console read immediately
641
- if (handle_data .utf8_buffer .bytes_used == 1 and handle_data .utf8_buffer .data [0 ] == 0x0A ) {
642
- reached_end_of_line = true ;
620
+ if (reached_end_of_line ) {
621
+ switch (truncate_state ) {
622
+ .truncate_all = > {},
623
+ else = > @memcpy (remaining_buffer [0.. bytes_read_into_temp_buffer ], temp_buffer [0.. bytes_read_into_temp_buffer ]),
624
+ }
625
+ bytes_read += bytes_read_into_temp_buffer ;
626
+ break ;
643
627
}
644
628
}
645
- // Is there enough space for all bytes in UTF-8 buffer?
646
- const has_enough_space : bool = buffer .len >= bytes_read + handle_data .utf8_buffer .bytes_used ;
647
- const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else buffer .len - bytes_read ;
648
- for (0.. max_bytes_to_read ) | index | {
649
- temp_buffer [bytes_read + index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
650
- // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
651
- handle_data .utf8_buffer .front_index +%= 1 ;
652
- }
653
- bytes_read += @truncate (max_bytes_to_read );
654
- handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
655
- if (has_enough_space ) {
656
- // UTF-8 buffer is now empty, we can safely reset front_index to zero
657
- handle_data .utf8_buffer .front_index = 0 ;
658
- } else {
659
- break ;
629
+ assert (handle_data .utf8_buffer .front_index == 0 );
630
+ while (bytes_read_into_temp_buffer < temp_buffer .len ) {
631
+ // Read only one code unit each loop
632
+ var utf16_code_unit : u16 = undefined ;
633
+ var utf16_code_units_read : DWORD = undefined ;
634
+ if (kernel32 .ReadConsoleW (hConsoleInput , & utf16_code_unit , 1 , & utf16_code_units_read , null ) == FALSE ) {
635
+ switch (kernel32 .GetLastError ()) {
636
+ .INVALID_HANDLE = > return error .NotOpenForReading ,
637
+ else = > | err | return unexpectedError (err ),
638
+ }
639
+ }
640
+ if (utf16_code_unit == 0x000D ) {
641
+ // CR should always be followed by an LF, so just discard it
642
+ continue ;
643
+ } else if (utf16_code_unit >= 0xD800 and utf16_code_unit <= 0xDBFF ) {
644
+ // When a high surrogate is encountered, store it into the UTF-16 buffer
645
+ assert (handle_data .utf16_buffer .code_units_used == 0 );
646
+ handle_data .utf16_buffer .data [0 ] = utf16_code_unit ;
647
+ handle_data .utf16_buffer .code_units_used = 1 ;
648
+ continue ;
649
+ } else if (utf16_code_unit >= 0xDC00 and utf16_code_unit <= 0xDFFF ) {
650
+ // When a low surrogate is encountered, assemble surrogate pair and convert to UTF-8
651
+ if (! (utf16_code_units_read == 1 and handle_data .utf16_buffer .data [0 ] >= 0xD800 and handle_data .utf16_buffer .data [0 ] <= 0xDBFF )) {
652
+ unreachable ;
653
+ }
654
+ handle_data .utf16_buffer .data [1 ] = utf16_code_unit ;
655
+ handle_data .utf16_buffer .code_units_used = 0 ;
656
+ const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , & handle_data .utf16_buffer .data ) catch return error .Unexpected ;
657
+ assert (utf8_bytes == 4 );
658
+ handle_data .utf8_buffer .bytes_used = 4 ;
659
+ } else {
660
+ assert (handle_data .utf16_buffer .code_units_used == 0 );
661
+ const utf8_bytes : usize = std .unicode .utf16leToUtf8 (& handle_data .utf8_buffer .data , @as (* [1 ]u16 , & utf16_code_unit )) catch return error .Unexpected ;
662
+ handle_data .utf8_buffer .bytes_used = @truncate (utf8_bytes );
663
+ if (handle_data .utf8_buffer .bytes_used == 1 ) {
664
+ if (handle_data .utf8_buffer .data [0 ] == 0x0A ) {
665
+ reached_end_of_line = true ;
666
+ } else if (handle_data .utf8_buffer .data [0 ] == 0x1A ) {
667
+ if (truncate_state == .do_not_truncate ) {
668
+ truncate_state = .truncate_after_SUB ;
669
+ truncate_index = bytes_read_into_temp_buffer + 1 ;
670
+ }
671
+ }
672
+ }
673
+ }
674
+ // Is there enough space for all bytes in UTF-8 buffer?
675
+ has_enough_space_in_remaining_buffer = remaining_buffer .len >= bytes_read_into_temp_buffer + handle_data .utf8_buffer .bytes_used ;
676
+ const has_enough_space : bool = has_enough_space_in_remaining_buffer and temp_buffer .len >= bytes_read_into_temp_buffer + handle_data .utf8_buffer .bytes_used ;
677
+ const max_bytes_to_read : usize = if (has_enough_space ) handle_data .utf8_buffer .bytes_used else remaining_buffer .len - bytes_read_into_temp_buffer ;
678
+ for (0.. max_bytes_to_read ) | index | {
679
+ temp_buffer [bytes_read_into_temp_buffer + index ] = handle_data .utf8_buffer .data [handle_data .utf8_buffer .front_index ];
680
+ // Front index wraps around in the case of 4-byte sequence (non-BMP code point)
681
+ handle_data .utf8_buffer .front_index +%= 1 ;
682
+ }
683
+ bytes_read_into_temp_buffer += @truncate (max_bytes_to_read );
684
+ handle_data .utf8_buffer .bytes_used -= @truncate (max_bytes_to_read );
685
+ if (has_enough_space ) {
686
+ // UTF-8 buffer is now empty, we can safely reset front_index to zero
687
+ handle_data .utf8_buffer .front_index = 0 ;
688
+ } else {
689
+ break ;
690
+ }
691
+ // LF ends a console read immediately
692
+ if (reached_end_of_line ) {
693
+ break ;
694
+ }
660
695
}
661
- // LF ends a console read immediately
662
- if (reached_end_of_line ) {
696
+ // Copy to user-provided buffer
697
+ const bytes_copied : DWORD = switch (truncate_state ) {
698
+ .do_not_truncate = > bytes_read_into_temp_buffer ,
699
+ .truncate_after_SUB = > truncate_index ,
700
+ .truncate_all = > 0 ,
701
+ };
702
+ @memcpy (remaining_buffer [0.. bytes_copied ], temp_buffer [0.. bytes_copied ]);
703
+ bytes_read += bytes_copied ;
704
+ // Early return conditions
705
+ if (! has_enough_space_in_remaining_buffer or reached_end_of_line ) {
663
706
break ;
664
707
}
665
708
}
666
- return ReadConsoleProcessUtf8Buffer (buffer , temp_buffer , bytes_read , true );
667
- }
668
-
669
- fn ReadConsoleProcessUtf8Buffer (buffer : []u8 , temp_buffer : []u8 , bytes_read : DWORD , comptime truncate_after_SUB : bool ) DWORD {
670
- if (truncate_after_SUB ) {
671
- // Truncate everything after the SUB (Ctrl+Z) character
672
- var index : DWORD = 0 ;
673
- var reached_end_of_file : bool = false ;
674
- while (index < bytes_read and ! reached_end_of_file ) {
675
- if (temp_buffer [index ] == 0x1A ) {
676
- reached_end_of_file = true ;
677
- }
678
- buffer [index ] = temp_buffer [index ];
679
- index += 1 ;
680
- }
681
- return index ;
682
- } else {
683
- std .mem .copy (u8 , buffer , temp_buffer );
684
- return bytes_read ;
685
- }
709
+ return bytes_read ;
686
710
}
687
711
688
712
pub const WriteFileError = error {
@@ -809,7 +833,7 @@ fn WriteConsoleWithUtf8ToUtf16Conversion(handle: HANDLE, bytes: []const u8) Writ
809
833
bytes_written += @truncate (bytes_available );
810
834
return bytes_written ;
811
835
} else {
812
- utf16_code_units = std .unicode .utf8ToUtf16Le (& utf16_buffer , bytes [byte_index .. byte_index + utf8_byte_sequence_length ]) catch return error .InvalidUtf8 ;
836
+ utf16_code_units = std .unicode .utf8ToUtf16Le (& utf16_buffer , bytes [byte_index .. byte_index + utf8_byte_sequence_length ]) catch return error .InvalidUtf8 ;
813
837
byte_index += utf8_byte_sequence_length ;
814
838
}
815
839
} else {
0 commit comments