Skip to content

Commit 3be258c

Browse files
trevnorrisstefanmb
authored andcommitted
buffer: add encoding parameter to fill()
Can now call fill() using following parameters if value is a String: fill(string[, start[, end]][, encoding]) And with the following if value is a Buffer: fill(buffer[, start[, end]]) The encoding is ignored if value is not a String. All other non-Buffer values are coerced to a uint32. A multibyte strings will simply be copied into the Buffer until the number of bytes run out. Meaning partial strings can be left behind: Buffer(3).fill('\u0222'); // returns: <Buffer c8 a2 c8> In some encoding cases, such as 'hex', fill() will throw if the input string is not valid. PR-URL: nodejs#4935 Reviewed-By: James M Snell <[email protected]>
1 parent 5341f13 commit 3be258c

File tree

6 files changed

+411
-32
lines changed

6 files changed

+411
-32
lines changed

doc/api/buffer.markdown

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -471,23 +471,38 @@ console.log(buf1.equals(buf3));
471471
// Prints: false
472472
```
473473

474-
### buf.fill(value[, offset[, end]])
474+
### buf.fill(value[, offset[, end]][, encoding])
475475

476-
* `value` {String|Number}
476+
* `value` {String|Buffer|Number}
477477
* `offset` {Number} Default: 0
478-
* `end` {Number} Default: `buffer.length`
478+
* `end` {Number} Default: `buf.length`
479+
* `encoding` {String} Default: `'utf8'`
479480
* Return: {Buffer}
480481

481-
Fills the Buffer with the specified value. If the `offset` and `end` are not
482-
given it will fill the entire Buffer. The method returns a reference to the
483-
Buffer so calls can be chained.
482+
Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
483+
and `end` (defaults to `buf.length`) are not given the entire buffer will be
484+
filled. The method returns a reference to the Buffer, so calls can be chained.
485+
This is meant as a small simplification to creating a Buffer. Allowing the
486+
creation and fill of the Buffer to be done on a single line:
484487

485488
```js
486489
const b = new Buffer(50).fill('h');
487490
console.log(b.toString());
488491
// Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
489492
```
490493

494+
`encoding` is only relevant if `value` is a string. Otherwise it is ignored.
495+
`value` is coerced to a `uint32` value if it is not a String or Number.
496+
497+
The `fill()` operation writes bytes into the Buffer dumbly. If the final write
498+
falls in between a multi-byte character then whatever bytes fit into the buffer
499+
are written.
500+
501+
```js
502+
Buffer(3).fill('\u0222');
503+
// Prints: <Buffer c8 a2 c8>
504+
```
505+
491506
### buf.indexOf(value[, byteOffset][, encoding])
492507

493508
* `value` {String|Buffer|Number}

lib/buffer.js

Lines changed: 35 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
498498
};
499499

500500

501-
Buffer.prototype.fill = function fill(val, start, end) {
502-
start = start >> 0;
503-
end = (end === undefined) ? this.length : end >> 0;
501+
// Usage:
502+
// buffer.fill(number[, offset[, end]])
503+
// buffer.fill(buffer[, offset[, end]])
504+
// buffer.fill(string[, offset[, end]][, encoding])
505+
Buffer.prototype.fill = function fill(val, start, end, encoding) {
506+
// Handle string cases:
507+
if (typeof val === 'string') {
508+
if (typeof start === 'string') {
509+
encoding = start;
510+
start = 0;
511+
end = this.length;
512+
} else if (typeof end === 'string') {
513+
encoding = end;
514+
end = this.length;
515+
}
516+
if (val.length === 1) {
517+
var code = val.charCodeAt(0);
518+
if (code < 256)
519+
val = code;
520+
}
521+
if (encoding !== undefined && typeof encoding !== 'string') {
522+
throw new TypeError('encoding must be a string');
523+
}
524+
if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
525+
throw new TypeError('Unknown encoding: ' + encoding);
526+
}
504527

528+
} else if (typeof val === 'number') {
529+
val = val & 255;
530+
}
531+
532+
// Invalid ranges are not set to a default, so can range check early.
505533
if (start < 0 || end > this.length)
506534
throw new RangeError('Out of range index');
535+
507536
if (end <= start)
508537
return this;
509538

510-
if (typeof val !== 'string') {
511-
val = val >>> 0;
512-
} else if (val.length === 1) {
513-
var code = val.charCodeAt(0);
514-
if (code < 256)
515-
val = code;
516-
}
539+
start = start >>> 0;
540+
end = end === undefined ? this.length : end >>> 0;
517541

518-
binding.fill(this, val, start, end);
542+
binding.fill(this, val, start, end, encoding);
519543

520544
return this;
521545
};

src/node_buffer.cc

Lines changed: 64 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
570570

571571

572572
void Fill(const FunctionCallbackInfo<Value>& args) {
573-
THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
573+
Environment* env = Environment::GetCurrent(args);
574+
575+
THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
574576
SPREAD_ARG(args[0], ts_obj);
575577

576578
size_t start = args[2]->Uint32Value();
577579
size_t end = args[3]->Uint32Value();
578-
size_t length = end - start;
579-
CHECK(length + start <= ts_obj_length);
580+
size_t fill_length = end - start;
581+
Local<String> str_obj;
582+
size_t str_length;
583+
enum encoding enc;
584+
CHECK(fill_length + start <= ts_obj_length);
585+
586+
// First check if Buffer has been passed.
587+
if (Buffer::HasInstance(args[1])) {
588+
SPREAD_ARG(args[1], fill_obj);
589+
str_length = fill_obj_length;
590+
memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
591+
goto start_fill;
592+
}
580593

581-
if (args[1]->IsNumber()) {
594+
// Then coerce everything that's not a string.
595+
if (!args[1]->IsString()) {
582596
int value = args[1]->Uint32Value() & 255;
583-
memset(ts_obj_data + start, value, length);
597+
memset(ts_obj_data + start, value, fill_length);
584598
return;
585599
}
586600

587-
node::Utf8Value str(args.GetIsolate(), args[1]);
588-
size_t str_length = str.length();
589-
size_t in_there = str_length;
590-
char* ptr = ts_obj_data + start + str_length;
601+
str_obj = args[1]->ToString(env->isolate());
602+
enc = ParseEncoding(env->isolate(), args[4], UTF8);
603+
str_length =
604+
enc == UTF8 ? str_obj->Utf8Length() :
605+
enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
606+
607+
if (enc == HEX && str_length % 2 != 0)
608+
return env->ThrowTypeError("Invalid hex string");
591609

592610
if (str_length == 0)
593611
return;
594612

595-
memcpy(ts_obj_data + start, *str, MIN(str_length, length));
613+
// Can't use StringBytes::Write() in all cases. For example if attempting
614+
// to write a two byte character into a one byte Buffer.
615+
if (enc == UTF8) {
616+
node::Utf8Value str(env->isolate(), args[1]);
617+
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
596618

597-
if (str_length >= length)
619+
} else if (enc == UCS2) {
620+
node::TwoByteValue str(env->isolate(), args[1]);
621+
memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
622+
623+
} else {
624+
// Write initial String to Buffer, then use that memory to copy remainder
625+
// of string. Correct the string length for cases like HEX where less than
626+
// the total string length is written.
627+
str_length = StringBytes::Write(env->isolate(),
628+
ts_obj_data + start,
629+
fill_length,
630+
str_obj,
631+
enc,
632+
nullptr);
633+
// This check is also needed in case Write() returns that no bytes could
634+
// be written.
635+
// TODO(trevnorris): Should this throw? Because of the string length was
636+
// greater than 0 but couldn't be written then the string was invalid.
637+
if (str_length == 0)
638+
return;
639+
}
640+
641+
start_fill:
642+
643+
if (str_length >= fill_length)
598644
return;
599645

600-
while (in_there < length - in_there) {
646+
647+
size_t in_there = str_length;
648+
char* ptr = ts_obj_data + start + str_length;
649+
650+
while (in_there < fill_length - in_there) {
601651
memcpy(ptr, ts_obj_data + start, in_there);
602652
ptr += in_there;
603653
in_there *= 2;
604654
}
605655

606-
if (in_there < length) {
607-
memcpy(ptr, ts_obj_data + start, length - in_there);
608-
in_there = length;
656+
if (in_there < fill_length) {
657+
memcpy(ptr, ts_obj_data + start, fill_length - in_there);
609658
}
610659
}
611660

src/util.cc

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
2525
str_[length_] = '\0';
2626
}
2727

28+
29+
TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
30+
: length_(0), str_(str_st_) {
31+
if (value.IsEmpty())
32+
return;
33+
34+
v8::Local<v8::String> string = value->ToString(isolate);
35+
if (string.IsEmpty())
36+
return;
37+
38+
// Allocate enough space to include the null terminator
39+
size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
40+
if (len > sizeof(str_st_)) {
41+
str_ = static_cast<uint16_t*>(malloc(len));
42+
CHECK_NE(str_, nullptr);
43+
}
44+
45+
const int flags =
46+
v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
47+
length_ = string->Write(str_, 0, len, flags);
48+
str_[length_] = '\0';
49+
}
50+
2851
} // namespace node

src/util.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,33 @@ class Utf8Value {
205205
char str_st_[1024];
206206
};
207207

208+
class TwoByteValue {
209+
public:
210+
explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
211+
212+
~TwoByteValue() {
213+
if (str_ != str_st_)
214+
free(str_);
215+
}
216+
217+
uint16_t* operator*() {
218+
return str_;
219+
};
220+
221+
const uint16_t* operator*() const {
222+
return str_;
223+
};
224+
225+
size_t length() const {
226+
return length_;
227+
};
228+
229+
private:
230+
size_t length_;
231+
uint16_t* str_;
232+
uint16_t str_st_[1024];
233+
};
234+
208235
} // namespace node
209236

210237
#endif // SRC_UTIL_H_

0 commit comments

Comments
 (0)