Skip to content

Commit d573ea1

Browse files
anonrigkvakillemire
committed
buffer: add SIMD Neon optimization for byteLength
Co-authored-by: Keyhan Vakil <[email protected]> Co-authored-by: Daniel Lemire <[email protected]>
1 parent c9ec72d commit d573ea1

File tree

4 files changed

+86
-8
lines changed

4 files changed

+86
-8
lines changed

node.gyp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
'src/node_report_utils.cc',
122122
'src/node_sea.cc',
123123
'src/node_serdes.cc',
124+
'src/node_simd.cc',
124125
'src/node_shadow_realm.cc',
125126
'src/node_snapshotable.cc',
126127
'src/node_sockaddr.cc',

src/node_buffer.cc

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "node_external_reference.h"
2727
#include "node_i18n.h"
2828
#include "node_internals.h"
29+
#include "node_simd.h"
2930

3031
#include "env-inl.h"
3132
#include "simdutf.h"
@@ -743,14 +744,8 @@ void SlowByteLengthUtf8(const FunctionCallbackInfo<Value>& args) {
743744

744745
uint32_t FastByteLengthUtf8(Local<Value> receiver,
745746
const v8::FastOneByteString& source) {
746-
uint32_t result = 0;
747-
uint32_t length = source.length;
748-
const uint8_t* data = reinterpret_cast<const uint8_t*>(source.data);
749-
for (uint32_t i = 0; i < length; ++i) {
750-
result += (data[i] >> 7);
751-
}
752-
result += length;
753-
return result;
747+
return node::simd::utf8_byte_length(
748+
reinterpret_cast<const uint8_t*>(source.data), source.length);
754749
}
755750

756751
static v8::CFunction fast_byte_length_utf8(

src/node_simd.cc

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#include "node_simd.h"
2+
3+
#include <string_view>
4+
5+
#if NODE_HAS_SIMD_NEON
6+
#include <arm_neon.h>
7+
#endif
8+
9+
namespace node {
10+
namespace simd {
11+
12+
#if NODE_HAS_SIMD_NEON
13+
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
14+
uint64_t result{0};
15+
16+
const int lanes = sizeof(uint8x16_t);
17+
const int max_sra_count = 256 / lanes; // Avoid overflowing vaddvq_u8.
18+
const int unrolls = max_sra_count;
19+
const int unrolled_lanes = lanes * unrolls;
20+
21+
const uint8_t* unroll_end = data + (length / unrolled_lanes) * unrolled_lanes;
22+
uint32_t length_after_unroll = length % unrolled_lanes;
23+
for (; data < unroll_end;) {
24+
uint8x16_t acc = {};
25+
for (int i = 0; i < unrolls; ++i, data += lanes) {
26+
uint8x16_t chunk = vld1q_u8(data);
27+
acc = vsraq_n_u8(acc, chunk, 7);
28+
}
29+
result += vaddvq_u8(acc);
30+
}
31+
32+
const uint8_t* simd_end = data + (length_after_unroll / lanes) * lanes;
33+
uint32_t length_after_simd = length % lanes;
34+
uint8x16_t acc = {};
35+
for (; data < simd_end; data += lanes) {
36+
uint8x16_t chunk = vld1q_u8(data);
37+
acc = vsraq_n_u8(acc, chunk, 7);
38+
}
39+
result += vaddvq_u8(acc);
40+
41+
const uint8_t* scalar_end = data + length_after_simd;
42+
for (; data < scalar_end; data += 1) {
43+
result += *data >> 7;
44+
}
45+
46+
return result + length;
47+
}
48+
#else
49+
uint32_t utf8_byte_length(const uint8_t* data, size_t length) {
50+
uint32_t result = 0;
51+
for (uint32_t i = 0; i < length; ++i) {
52+
result += (data[i] >> 7);
53+
}
54+
result += length;
55+
return result;
56+
}
57+
#endif
58+
59+
} // namespace simd
60+
} // namespace node

src/node_simd.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
#ifndef SRC_NODE_SIMD_H_
2+
#define SRC_NODE_SIMD_H_
3+
4+
#if defined(__aarch64__) || defined(_M_ARM64)
5+
#define NODE_HAS_SIMD_NEON 1
6+
#endif
7+
8+
#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
9+
10+
#include <string_view>
11+
12+
namespace node {
13+
namespace simd {
14+
15+
uint32_t utf8_byte_length(const uint8_t* input, size_t length);
16+
17+
} // namespace simd
18+
} // namespace node
19+
20+
#endif // defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS
21+
22+
#endif // SRC_NODE_SIMD_H_

0 commit comments

Comments
 (0)