@@ -44,186 +44,133 @@ function StringDecoder(encoding) {
44
44
var nb ;
45
45
switch ( this . encoding ) {
46
46
case 'utf16le' :
47
- this . text = utf16Text ;
48
- this . end = utf16End ;
47
+ this . complete = utf16Complete ;
48
+ this . flush = simpleFlush ;
49
49
// fall through
50
50
case 'utf8' :
51
51
nb = 4 ;
52
52
break ;
53
53
case 'base64' :
54
- this . text = base64Text ;
55
- this . end = base64End ;
54
+ this . complete = base64Complete ;
55
+ this . flush = simpleFlush ;
56
56
nb = 3 ;
57
57
break ;
58
58
default :
59
59
this . write = simpleWrite ;
60
60
this . end = simpleEnd ;
61
61
return ;
62
62
}
63
- this . lastNeed = 0 ;
64
- this . lastTotal = 0 ;
63
+ this . partial = 0 ;
65
64
this . lastChar = Buffer . allocUnsafe ( nb ) ;
66
65
}
67
66
68
67
StringDecoder . prototype . write = function ( buf ) {
69
68
if ( buf . length === 0 )
70
69
return '' ;
71
- var r ;
72
- var i ;
73
- if ( this . lastNeed ) {
74
- r = this . fillLast ( buf ) ;
75
- if ( r === undefined )
76
- return '' ;
77
- i = this . lastNeed ;
78
- this . lastNeed = 0 ;
79
- } else {
80
- i = 0 ;
81
- }
82
- if ( i < buf . length )
83
- return ( r ? r + this . text ( buf , i ) : this . text ( buf , i ) ) ;
84
- return r || '' ;
70
+ const partial = this . partial ;
71
+ if ( ! partial )
72
+ return this . text ( buf , 0 , buf . length ) ;
73
+
74
+ // We have incomplete characters in partial many bytes from last run.
75
+ // Copy bytes from buf to fill lastChar (if there is enough input).
76
+ const newHeadLen = Math . min ( buf . length , this . lastChar . length - partial ) ;
77
+ const totalHeadLen = newHeadLen + partial ;
78
+ buf . copy ( this . lastChar , partial , 0 , newHeadLen ) ;
79
+ // Now we have totalHeadLen bytes of input in lastChar, try to convert that.
80
+ let r = this . text ( this . lastChar , 0 , totalHeadLen ) ;
81
+ if ( this . partial <= newHeadLen ) // consumed at least all the old head
82
+ r += this . text ( buf , newHeadLen - this . partial , buf . length ) ;
83
+ return r ;
85
84
} ;
86
85
87
- StringDecoder . prototype . end = utf8End ;
88
-
89
86
// Returns only complete characters in a Buffer
90
- StringDecoder . prototype . text = utf8Text ;
87
+ StringDecoder . prototype . text = function ( buf , start , end ) {
88
+ if ( start === end )
89
+ return '' ;
90
+ const complete = this . complete ( buf , start , end ) ;
91
+ this . partial = end - complete ;
92
+ if ( this . partial && buf !== this . lastChar )
93
+ buf . copy ( this . lastChar , 0 , complete , end ) ;
94
+ if ( start === complete )
95
+ return '' ;
96
+ return buf . toString ( this . encoding , start , complete ) ;
97
+ } ;
91
98
92
- // Attempts to complete a partial character using bytes from a Buffer
93
- StringDecoder . prototype . fillLast = function ( buf ) {
94
- if ( this . lastNeed <= buf . length ) {
95
- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , this . lastNeed ) ;
96
- return this . lastChar . toString ( this . encoding , 0 , this . lastTotal ) ;
99
+ // Returns a suitable representation of incomplete characters as well
100
+ StringDecoder . prototype . end = function ( buf ) {
101
+ let r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
102
+ if ( this . partial ) {
103
+ r += this . flush ( ) ;
104
+ this . partial = 0 ;
97
105
}
98
- buf . copy ( this . lastChar , this . lastTotal - this . lastNeed , 0 , buf . length ) ;
99
- this . lastNeed -= buf . length ;
106
+ return r ;
100
107
} ;
101
108
102
- // Checks the type of a UTF-8 byte, whether it's ASCII, a leading byte, or a
103
- // continuation byte.
104
- function utf8CheckByte ( byte ) {
105
- if ( byte <= 0x7F )
106
- return 0 ;
107
- else if ( byte >> 5 === 0x06 )
108
- return 2 ;
109
- else if ( byte >> 4 === 0x0E )
110
- return 3 ;
111
- else if ( byte >> 3 === 0x1E )
112
- return 4 ;
113
- return - 1 ;
114
- }
109
+ // Given (buf, start, end), determine the maximal n <= end such that
110
+ // buf.slice(start, n) contains only complete characters
111
+ StringDecoder . prototype . complete = utf8Complete ;
112
+
113
+ // Returns a string representation of the this.partial bytes in
114
+ // this.lastChar which represent an incomplete character
115
+ StringDecoder . prototype . flush = utf8Flush ;
115
116
116
117
// Checks at most the last 3 bytes of a Buffer for an incomplete UTF-8
117
- // character, returning the total number of bytes needed to complete the partial
118
- // character (if applicable).
119
- function utf8CheckIncomplete ( self , buf , i ) {
120
- var j = buf . length - 1 ;
121
- if ( j < i )
122
- return 0 ;
123
- var nb = utf8CheckByte ( buf [ j -- ] ) ;
124
- if ( nb >= 0 ) {
125
- if ( nb > 0 )
126
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
127
- return nb ;
128
- }
129
- if ( j < i )
130
- return 0 ;
131
- nb = utf8CheckByte ( buf [ j -- ] ) ;
132
- if ( nb >= 0 ) {
133
- if ( nb > 0 )
134
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
135
- return nb ;
136
- }
137
- if ( j < i )
138
- return 0 ;
139
- nb = utf8CheckByte ( buf [ j -- ] ) ;
140
- if ( nb >= 0 ) {
141
- if ( nb > 0 )
142
- self . lastNeed = nb + 1 - ( buf . length - j ) ;
143
- return nb ;
118
+ // character, returning the position after the last complete character.
119
+ function utf8Complete ( buf , start , end ) {
120
+ if ( start > end - 3 )
121
+ start = end - 3 ;
122
+ for ( let i = end - 1 ; i >= start ; -- i ) {
123
+ const byte = buf [ i ] ;
124
+ let numBytes ;
125
+ if ( byte >> 6 === 0x02 )
126
+ continue ; // continuation byte
127
+ else if ( byte >> 5 === 0x06 )
128
+ numBytes = 2 ;
129
+ else if ( byte >> 4 === 0x0E )
130
+ numBytes = 3 ;
131
+ else if ( byte >> 3 === 0x1E )
132
+ numBytes = 4 ;
133
+ else
134
+ numBytes = 1 ; // ASCII or invalid
135
+ if ( i + numBytes > end ) // incomplete
136
+ return i ; // continue next run at leading byte
137
+ // Have complete sequence, possibly followed by garbage continuation.
138
+ return end ;
144
139
}
145
- return 0 ;
146
- }
147
-
148
- // Returns all complete UTF-8 characters in a Buffer. If the Buffer ended on a
149
- // partial character, the character's bytes are buffered until the required
150
- // number of bytes are available.
151
- function utf8Text ( buf , i ) {
152
- const total = utf8CheckIncomplete ( this , buf , i ) ;
153
- if ( ! this . lastNeed )
154
- return buf . toString ( 'utf8' , i ) ;
155
- this . lastTotal = total ;
156
- const end = buf . length - ( total - this . lastNeed ) ;
157
- buf . copy ( this . lastChar , 0 , end ) ;
158
- return buf . toString ( 'utf8' , i , end ) ;
140
+ // Ends in valid 4-byte sequence or invalid continuation characters.
141
+ // Either way the input is complete, so convert it as is.
142
+ return end ;
159
143
}
160
144
161
145
// For UTF-8, a replacement character for each buffered byte of a (partial)
162
146
// character needs to be added to the output.
163
- function utf8End ( buf ) {
164
- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
165
- if ( this . lastNeed )
166
- return r + '\ufffd' . repeat ( this . lastTotal - this . lastNeed ) ;
167
- return r ;
147
+ function utf8Flush ( ) {
148
+ return '\ufffd' . repeat ( this . partial ) ;
168
149
}
169
150
170
151
// UTF-16LE typically needs two bytes per character, but even if we have an even
171
152
// number of bytes available, we need to check if we end on a leading/high
172
153
// surrogate. In that case, we need to wait for the next two bytes in order to
173
154
// decode the last character properly.
174
- function utf16Text ( buf , i ) {
175
- if ( ( buf . length - i ) % 2 === 0 ) {
176
- const r = buf . toString ( 'utf16le' , i ) ;
177
- if ( r ) {
178
- const c = r . charCodeAt ( r . length - 1 ) ;
179
- if ( c >= 0xD800 && c <= 0xDBFF ) {
180
- this . lastNeed = 2 ;
181
- this . lastTotal = 4 ;
182
- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
183
- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
184
- return r . slice ( 0 , - 1 ) ;
185
- }
186
- }
187
- return r ;
155
+ function utf16Complete ( buf , start , end ) {
156
+ if ( ( end - start ) & 1 )
157
+ -- end ;
158
+ if ( end > start ) {
159
+ const byte = buf [ end - 1 ] ;
160
+ if ( byte >= 0xD8 && byte <= 0xDB )
161
+ return end - 2 ;
188
162
}
189
- this . lastNeed = 1 ;
190
- this . lastTotal = 2 ;
191
- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
192
- return buf . toString ( 'utf16le' , i , buf . length - 1 ) ;
163
+ return end ;
193
164
}
194
165
195
- // For UTF-16LE we do not explicitly append special replacement characters if we
196
- // end on a partial character, we simply let v8 handle that.
197
- function utf16End ( buf ) {
198
- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
199
- if ( this . lastNeed ) {
200
- const end = this . lastTotal - this . lastNeed ;
201
- return r + this . lastChar . toString ( 'utf16le' , 0 , end ) ;
202
- }
203
- return r ;
166
+ function base64Complete ( buf , start , end ) {
167
+ return end - ( end - start ) % 3 ;
204
168
}
205
169
206
- function base64Text ( buf , i ) {
207
- const n = ( buf . length - i ) % 3 ;
208
- if ( n === 0 )
209
- return buf . toString ( 'base64' , i ) ;
210
- this . lastNeed = 3 - n ;
211
- this . lastTotal = 3 ;
212
- if ( n === 1 ) {
213
- this . lastChar [ 0 ] = buf [ buf . length - 1 ] ;
214
- } else {
215
- this . lastChar [ 0 ] = buf [ buf . length - 2 ] ;
216
- this . lastChar [ 1 ] = buf [ buf . length - 1 ] ;
217
- }
218
- return buf . toString ( 'base64' , i , buf . length - n ) ;
219
- }
220
-
221
-
222
- function base64End ( buf ) {
223
- const r = ( buf && buf . length ? this . write ( buf ) : '' ) ;
224
- if ( this . lastNeed )
225
- return r + this . lastChar . toString ( 'base64' , 0 , 3 - this . lastNeed ) ;
226
- return r ;
170
+ // For UTF-16LE and Base64 we do not explicitly append special replacement
171
+ // characters if we end on a partial character, we simply let v8 handle that.
172
+ function simpleFlush ( ) {
173
+ return this . lastChar . toString ( this . encoding , 0 , this . partial ) ;
227
174
}
228
175
229
176
// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex)
0 commit comments