Skip to content

Commit 131275a

Browse files
committed
buffer: add SIMD Neon optimization for byteLength
1 parent c9ec72d commit 131275a

File tree

1 file changed

+40
-0
lines changed

1 file changed

+40
-0
lines changed

src/node_buffer.cc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@
3838
#include <cstring>
3939
#include <climits>
4040

41+
#if defined(__aarch64__) || defined(_M_ARM64)
42+
#define NODE_HAS_SIMD_NEON 1
43+
#endif
44+
45+
#if NODE_HAS_SIMD_NEON
46+
#include <arm_neon.h>
47+
#endif
48+
4149
#define THROW_AND_RETURN_UNLESS_BUFFER(env, obj) \
4250
THROW_AND_RETURN_IF_NOT_BUFFER(env, obj, "argument") \
4351

@@ -741,6 +749,37 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
741749
args.GetReturnValue().Set(args[0].As<String>()->Utf8Length(env->isolate()));
742750
}
743751

752+
#if NODE_HAS_SIMD_NEON
753+
uint32_t FastByteLengthUtf8(Local<Value> receiver,
754+
const v8::FastOneByteString& source) {
755+
const auto data = reinterpret_cast<const uint8_t*>(source.data);
756+
const uint8x16_t mask = vdupq_n_u8(0x80);
757+
uint8x16_t result_vector = vdupq_n_u8(0);
758+
size_t i = 0;
759+
760+
for (; i < source.length; i += 16) {
761+
// load 16 bytes from data
762+
uint8x16_t values = vld1q_u8(data + i);
763+
764+
// extract the high bits using 0x80 mask
765+
uint8x16_t high_bits = vcgeq_u8(values, mask);
766+
767+
// accumulate the high bits to result_vector
768+
result_vector = vqaddq_u8(result_vector, high_bits);
769+
}
770+
771+
// sum the elements in the result_vector
772+
uint64x2_t sum64 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(result_vector)));
773+
774+
uint32_t answer = vgetq_lane_u64(sum64, 0) + vgetq_lane_u64(sum64, 1);
775+
776+
for (; i < source.length; ++i) {
777+
answer += (data[i] >> 7);
778+
}
779+
780+
return answer + source.length;
781+
}
782+
#else
744783
uint32_t FastByteLengthUtf8(Local<Value> receiver,
745784
const v8::FastOneByteString& source) {
746785
uint32_t result = 0;
@@ -752,6 +791,7 @@ uint32_t FastByteLengthUtf8(Local<Value> receiver,
752791
result += length;
753792
return result;
754793
}
794+
#endif
755795

756796
static v8::CFunction fast_byte_length_utf8(
757797
v8::CFunction::Make(FastByteLengthUtf8));

0 commit comments

Comments
 (0)