buffer: add encoding parameter to fill()

trevnorris · stefanmb · commit 3be258c98eb1 · 2016-02-23T14:03:08.000-05:00
Can now call fill() using following parameters if value is a String: fill(string[, start[, end]][, encoding]) And with the following if value is a Buffer: fill(buffer[, start[, end]]) The encoding is ignored if value is not a String. All other non-Buffer values are coerced to a uint32. A multibyte strings will simply be copied into the Buffer until the number of bytes run out. Meaning partial strings can be left behind: Buffer(3).fill('\u0222'); // returns: <Buffer c8 a2 c8> In some encoding cases, such as 'hex', fill() will throw if the input string is not valid. PR-URL: nodejs#4935 Reviewed-By: James M Snell <jasnell@gmail.com>
diff --git a/doc/api/buffer.markdown b/doc/api/buffer.markdown
@@ -471,23 +471,38 @@ console.log(buf1.equals(buf3));
   // Prints: false
 ```
 
-### buf.fill(value[, offset[, end]])
+### buf.fill(value[, offset[, end]][, encoding])
 
-* `value` {String|Number}
+* `value` {String|Buffer|Number}
 * `offset` {Number} Default: 0
-* `end` {Number} Default: `buffer.length`
+* `end` {Number} Default: `buf.length`
+* `encoding` {String} Default: `'utf8'`
 * Return: {Buffer}
 
-Fills the Buffer with the specified value. If the `offset` and `end` are not
-given it will fill the entire Buffer. The method returns a reference to the
-Buffer so calls can be chained.
+Fills the Buffer with the specified value. If the `offset` (defaults to `0`)
+and `end` (defaults to `buf.length`) are not given the entire buffer will be
+filled. The method returns a reference to the Buffer, so calls can be chained.
+This is meant as a small simplification to creating a Buffer. Allowing the
+creation and fill of the Buffer to be done on a single line:
 
 ```js
 const b = new Buffer(50).fill('h');
 console.log(b.toString());
   // Prints: hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh
 ```
 
+`encoding` is only relevant if `value` is a string. Otherwise it is ignored.
+`value` is coerced to a `uint32` value if it is not a String or Number.
+
+The `fill()` operation writes bytes into the Buffer dumbly. If the final write
+falls in between a multi-byte character then whatever bytes fit into the buffer
+are written.
+
+```js
+Buffer(3).fill('\u0222');
+  // Prints: <Buffer c8 a2 c8>
+```
+
 ### buf.indexOf(value[, byteOffset][, encoding])
 
 * `value` {String|Buffer|Number}
diff --git a/lib/buffer.js b/lib/buffer.js
@@ -498,24 +498,48 @@ Buffer.prototype.includes = function includes(val, byteOffset, encoding) {
 };
 
 
-Buffer.prototype.fill = function fill(val, start, end) {
-  start = start >> 0;
-  end = (end === undefined) ? this.length : end >> 0;
+// Usage:
+//    buffer.fill(number[, offset[, end]])
+//    buffer.fill(buffer[, offset[, end]])
+//    buffer.fill(string[, offset[, end]][, encoding])
+Buffer.prototype.fill = function fill(val, start, end, encoding) {
+  // Handle string cases:
+  if (typeof val === 'string') {
+    if (typeof start === 'string') {
+      encoding = start;
+      start = 0;
+      end = this.length;
+    } else if (typeof end === 'string') {
+      encoding = end;
+      end = this.length;
+    }
+    if (val.length === 1) {
+      var code = val.charCodeAt(0);
+      if (code < 256)
+        val = code;
+    }
+    if (encoding !== undefined && typeof encoding !== 'string') {
+      throw new TypeError('encoding must be a string');
+    }
+    if (typeof encoding === 'string' && !Buffer.isEncoding(encoding)) {
+      throw new TypeError('Unknown encoding: ' + encoding);
+    }
 
+  } else if (typeof val === 'number') {
+    val = val & 255;
+  }
+
+  // Invalid ranges are not set to a default, so can range check early.
   if (start < 0 || end > this.length)
     throw new RangeError('Out of range index');
+
   if (end <= start)
     return this;
 
-  if (typeof val !== 'string') {
-    val = val >>> 0;
-  } else if (val.length === 1) {
-    var code = val.charCodeAt(0);
-    if (code < 256)
-      val = code;
-  }
+  start = start >>> 0;
+  end = end === undefined ? this.length : end >>> 0;
 
-  binding.fill(this, val, start, end);
+  binding.fill(this, val, start, end, encoding);
 
   return this;
 };
diff --git a/src/node_buffer.cc b/src/node_buffer.cc
@@ -570,42 +570,91 @@ void Copy(const FunctionCallbackInfo<Value> &args) {
 
 
 void Fill(const FunctionCallbackInfo<Value>& args) {
-  THROW_AND_RETURN_UNLESS_BUFFER(Environment::GetCurrent(args), args[0]);
+  Environment* env = Environment::GetCurrent(args);
+
+  THROW_AND_RETURN_UNLESS_BUFFER(env, args[0]);
   SPREAD_ARG(args[0], ts_obj);
 
   size_t start = args[2]->Uint32Value();
   size_t end = args[3]->Uint32Value();
-  size_t length = end - start;
-  CHECK(length + start <= ts_obj_length);
+  size_t fill_length = end - start;
+  Local<String> str_obj;
+  size_t str_length;
+  enum encoding enc;
+  CHECK(fill_length + start <= ts_obj_length);
+
+  // First check if Buffer has been passed.
+  if (Buffer::HasInstance(args[1])) {
+    SPREAD_ARG(args[1], fill_obj);
+    str_length = fill_obj_length;
+    memcpy(ts_obj_data + start, fill_obj_data, MIN(str_length, fill_length));
+    goto start_fill;
+  }
 
-  if (args[1]->IsNumber()) {
+  // Then coerce everything that's not a string.
+  if (!args[1]->IsString()) {
     int value = args[1]->Uint32Value() & 255;
-    memset(ts_obj_data + start, value, length);
+    memset(ts_obj_data + start, value, fill_length);
     return;
   }
 
-  node::Utf8Value str(args.GetIsolate(), args[1]);
-  size_t str_length = str.length();
-  size_t in_there = str_length;
-  char* ptr = ts_obj_data + start + str_length;
+  str_obj = args[1]->ToString(env->isolate());
+  enc = ParseEncoding(env->isolate(), args[4], UTF8);
+  str_length =
+      enc == UTF8 ? str_obj->Utf8Length() :
+      enc == UCS2 ? str_obj->Length() * sizeof(uint16_t) : str_obj->Length();
+
+  if (enc == HEX && str_length  % 2 != 0)
+    return env->ThrowTypeError("Invalid hex string");
 
   if (str_length == 0)
     return;
 
-  memcpy(ts_obj_data + start, *str, MIN(str_length, length));
+  // Can't use StringBytes::Write() in all cases. For example if attempting
+  // to write a two byte character into a one byte Buffer.
+  if (enc == UTF8) {
+    node::Utf8Value str(env->isolate(), args[1]);
+    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
 
-  if (str_length >= length)
+  } else if (enc == UCS2) {
+    node::TwoByteValue str(env->isolate(), args[1]);
+    memcpy(ts_obj_data + start, *str, MIN(str_length, fill_length));
+
+  } else {
+    // Write initial String to Buffer, then use that memory to copy remainder
+    // of string. Correct the string length for cases like HEX where less than
+    // the total string length is written.
+    str_length = StringBytes::Write(env->isolate(),
+                                    ts_obj_data + start,
+                                    fill_length,
+                                    str_obj,
+                                    enc,
+                                    nullptr);
+    // This check is also needed in case Write() returns that no bytes could
+    // be written.
+    // TODO(trevnorris): Should this throw? Because of the string length was
+    // greater than 0 but couldn't be written then the string was invalid.
+    if (str_length == 0)
+      return;
+  }
+
+ start_fill:
+
+  if (str_length >= fill_length)
     return;
 
-  while (in_there < length - in_there) {
+
+  size_t in_there = str_length;
+  char* ptr = ts_obj_data + start + str_length;
+
+  while (in_there < fill_length - in_there) {
     memcpy(ptr, ts_obj_data + start, in_there);
     ptr += in_there;
     in_there *= 2;
   }
 
-  if (in_there < length) {
-    memcpy(ptr, ts_obj_data + start, length - in_there);
-    in_there = length;
+  if (in_there < fill_length) {
+    memcpy(ptr, ts_obj_data + start, fill_length - in_there);
   }
 }
 
diff --git a/src/util.cc b/src/util.cc
@@ -25,4 +25,27 @@ Utf8Value::Utf8Value(v8::Isolate* isolate, v8::Local<v8::Value> value)
   str_[length_] = '\0';
 }
 
+
+TwoByteValue::TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value)
+    : length_(0), str_(str_st_) {
+  if (value.IsEmpty())
+    return;
+
+  v8::Local<v8::String> string = value->ToString(isolate);
+  if (string.IsEmpty())
+    return;
+
+  // Allocate enough space to include the null terminator
+  size_t len = StringBytes::StorageSize(isolate, string, UCS2) + 1;
+  if (len > sizeof(str_st_)) {
+    str_ = static_cast<uint16_t*>(malloc(len));
+    CHECK_NE(str_, nullptr);
+  }
+
+  const int flags =
+      v8::String::NO_NULL_TERMINATION | v8::String::REPLACE_INVALID_UTF8;
+  length_ = string->Write(str_, 0, len, flags);
+  str_[length_] = '\0';
+}
+
 }  // namespace node
diff --git a/src/util.h b/src/util.h
@@ -205,6 +205,33 @@ class Utf8Value {
     char str_st_[1024];
 };
 
+class TwoByteValue {
+  public:
+    explicit TwoByteValue(v8::Isolate* isolate, v8::Local<v8::Value> value);
+
+    ~TwoByteValue() {
+      if (str_ != str_st_)
+        free(str_);
+    }
+
+    uint16_t* operator*() {
+      return str_;
+    };
+
+    const uint16_t* operator*() const {
+      return str_;
+    };
+
+    size_t length() const {
+      return length_;
+    };
+
+  private:
+    size_t length_;
+    uint16_t* str_;
+    uint16_t str_st_[1024];
+};
+
 }  // namespace node
 
 #endif  // SRC_UTIL_H_
diff --git a/test/parallel/test-buffer-fill.js b/test/parallel/test-buffer-fill.js