16
16
#include < string>
17
17
#include < memory>
18
18
#include < iostream>
19
+ #include < cstring>
20
+ #include < iterator>
21
+ #include < algorithm>
19
22
20
23
PYBIND11_NAMESPACE_BEGIN (PYBIND11_NAMESPACE)
21
24
PYBIND11_NAMESPACE_BEGIN(detail)
@@ -38,25 +41,73 @@ class pythonbuf : public std::streambuf {
38
41
return sync () == 0 ? traits_type::not_eof (c) : traits_type::eof ();
39
42
}
40
43
44
+ // Computes how many bytes at the end of the buffer are part of an
45
+ // incomplete sequence of UTF-8 bytes.
46
+ // Precondition: pbase() < pptr()
47
+ size_t utf8_remainder () const {
48
+ const auto rbase = std::reverse_iterator<char *>(pbase ());
49
+ const auto rpptr = std::reverse_iterator<char *>(pptr ());
50
+ auto is_ascii = [](char c) {
51
+ return (static_cast <unsigned char >(c) & 0x80 ) == 0x00 ;
52
+ };
53
+ auto is_leading = [](char c) {
54
+ return (static_cast <unsigned char >(c) & 0xC0 ) == 0xC0 ;
55
+ };
56
+ auto is_leading_2b = [](char c) {
57
+ return static_cast <unsigned char >(c) <= 0xDF ;
58
+ };
59
+ auto is_leading_3b = [](char c) {
60
+ return static_cast <unsigned char >(c) <= 0xEF ;
61
+ };
62
+ // If the last character is ASCII, there are no incomplete code points
63
+ if (is_ascii (*rpptr))
64
+ return 0 ;
65
+ // Otherwise, work back from the end of the buffer and find the first
66
+ // UTF-8 leading byte
67
+ const auto rpend = rbase - rpptr >= 3 ? rpptr + 3 : rbase;
68
+ const auto leading = std::find_if (rpptr, rpend, is_leading);
69
+ if (leading == rbase)
70
+ return 0 ;
71
+ const auto dist = static_cast <size_t >(leading - rpptr);
72
+ size_t remainder = 0 ;
73
+
74
+ if (dist == 0 )
75
+ remainder = 1 ; // 1-byte code point is impossible
76
+ else if (dist == 1 )
77
+ remainder = is_leading_2b (*leading) ? 0 : dist + 1 ;
78
+ else if (dist == 2 )
79
+ remainder = is_leading_3b (*leading) ? 0 : dist + 1 ;
80
+ // else if (dist >= 3), at least 4 bytes before encountering an UTF-8
81
+ // leading byte, either no remainder or invalid UTF-8.
82
+ // Invalid UTF-8 will cause an exception later when converting
83
+ // to a Python string, so that's not handled here.
84
+ return remainder ;
85
+ }
86
+
41
87
// This function must be non-virtual to be called in a destructor. If the
42
88
// rare MSVC test failure shows up with this version, then this should be
43
89
// simplified to a fully qualified call.
44
90
int _sync () {
45
- if (pbase () != pptr ()) {
46
-
47
- {
48
- gil_scoped_acquire tmp;
49
-
91
+ if (pbase () != pptr ()) { // If buffer is not empty
92
+ gil_scoped_acquire tmp;
93
+ // Placed inside gil_scoped_acquire as a mutex to avoid a race.
94
+ if (pbase () != pptr ()) { // Check again under the lock
50
95
// This subtraction cannot be negative, so dropping the sign.
51
- str line (pbase (), static_cast <size_t >(pptr () - pbase ()));
52
-
53
- pywrite (line);
54
- pyflush ();
55
-
56
- // Placed inside gil_scoped_aquire as a mutex to avoid a race
96
+ auto size = static_cast <size_t >(pptr () - pbase ());
97
+ size_t remainder = utf8_remainder ();
98
+
99
+ if (size > remainder ) {
100
+ str line (pbase (), size - remainder );
101
+ pywrite (line);
102
+ pyflush ();
103
+ }
104
+
105
+ // Copy the remainder at the end of the buffer to the beginning:
106
+ if (remainder > 0 )
107
+ std::memmove (pbase (), pptr () - remainder , remainder );
57
108
setp (pbase (), epptr ());
109
+ pbump (static_cast <int >(remainder ));
58
110
}
59
-
60
111
}
61
112
return 0 ;
62
113
}
0 commit comments