Skip to content

Commit f977630

Browse files
authored
Merge pull request #1470 from RReverser/even-faster-strings
Speed up passing ASCII-only strings to WASM
2 parents 098b67d + 15defcf commit f977630

File tree

6 files changed

+102
-19
lines changed

6 files changed

+102
-19
lines changed

crates/cli-support/src/js/mod.rs

Lines changed: 49 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1445,18 +1445,48 @@ impl<'a> Context<'a> {
14451445
self.expose_text_encoder();
14461446
self.expose_uint8_memory();
14471447

1448+
// A fast path that directly writes char codes into WASM memory as long
1449+
// as it finds only ASCII characters.
1450+
//
1451+
// This is much faster for common ASCII strings because it can avoid
1452+
// calling out into C++ TextEncoder code.
1453+
//
1454+
// This might be not very intuitive, but such calls are usually more
1455+
// expensive in mainstream engines than staying in the JS, and
1456+
// charCodeAt on ASCII strings is usually optimised to raw bytes.
1457+
let start_encoding_as_ascii = format!(
1458+
"
1459+
{}
1460+
let size = arg.length;
1461+
let ptr = wasm.__wbindgen_malloc(size);
1462+
let offset = 0;
1463+
{{
1464+
const mem = getUint8Memory();
1465+
for (; offset < arg.length; offset++) {{
1466+
const code = arg.charCodeAt(offset);
1467+
if (code > 0x7F) break;
1468+
mem[ptr + offset] = code;
1469+
}}
1470+
}}
1471+
",
1472+
debug
1473+
);
1474+
14481475
// The first implementation we have for this is to use
14491476
// `TextEncoder#encode` which has been around for quite some time.
14501477
let use_encode = format!(
14511478
"
14521479
{}
1453-
const buf = cachedTextEncoder.encode(arg);
1454-
const ptr = wasm.__wbindgen_malloc(buf.length);
1455-
getUint8Memory().set(buf, ptr);
1456-
WASM_VECTOR_LEN = buf.length;
1480+
if (offset !== arg.length) {{
1481+
const buf = cachedTextEncoder.encode(arg.slice(offset));
1482+
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + buf.length);
1483+
getUint8Memory().set(buf, ptr + offset);
1484+
offset += buf.length;
1485+
}}
1486+
WASM_VECTOR_LEN = offset;
14571487
return ptr;
14581488
",
1459-
debug
1489+
start_encoding_as_ascii
14601490
);
14611491

14621492
// Another possibility is to use `TextEncoder#encodeInto` which is much
@@ -1465,23 +1495,23 @@ impl<'a> Context<'a> {
14651495
let use_encode_into = format!(
14661496
"
14671497
{}
1468-
let size = arg.length;
1469-
let ptr = wasm.__wbindgen_malloc(size);
1470-
let writeOffset = 0;
1471-
while (true) {{
1472-
const view = getUint8Memory().subarray(ptr + writeOffset, ptr + size);
1473-
const {{ read, written }} = cachedTextEncoder.encodeInto(arg, view);
1474-
writeOffset += written;
1475-
if (read === arg.length) {{
1476-
break;
1477-
}}
1478-
arg = arg.substring(read);
1479-
ptr = wasm.__wbindgen_realloc(ptr, size, size += arg.length * 3);
1498+
if (offset !== arg.length) {{
1499+
arg = arg.slice(offset);
1500+
ptr = wasm.__wbindgen_realloc(ptr, size, size = offset + arg.length * 3);
1501+
const view = getUint8Memory().subarray(ptr + offset, ptr + size);
1502+
const ret = cachedTextEncoder.encodeInto(arg, view);
1503+
{}
1504+
offset += cachedTextEncoder.encodeInto(arg, view).written;
14801505
}}
1481-
WASM_VECTOR_LEN = writeOffset;
1506+
WASM_VECTOR_LEN = offset;
14821507
return ptr;
14831508
",
1484-
debug
1509+
start_encoding_as_ascii,
1510+
if self.config.debug {
1511+
"if (ret.read != arg.length) throw new Error('failed to pass whole string');"
1512+
} else {
1513+
""
1514+
},
14851515
);
14861516

14871517
// Looks like `encodeInto` doesn't currently work when the memory passed

tests/headless/main.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,3 +50,4 @@ pub fn import_export_same_name() {
5050
pub mod snippets;
5151
pub mod modules;
5252
pub mod anyref_heap_live_count;
53+
pub mod strings;

tests/headless/strings.js

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
export function test_string_roundtrip(f) {
2+
const test = expected => {
3+
const actual = f(expected);
4+
if (actual === expected)
5+
return;
6+
throw new Error(`string roundtrip "${actual}" != "${expected}"`);
7+
};
8+
9+
test('');
10+
test('a');
11+
test('💖');
12+
13+
test('a longer string');
14+
test('a longer 💖 string');
15+
}

tests/headless/strings.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
use wasm_bindgen::prelude::*;
2+
use wasm_bindgen_test::*;
3+
4+
#[wasm_bindgen(module = "/tests/headless/strings.js")]
5+
extern "C" {
6+
fn test_string_roundtrip(c: &Closure<Fn(String) -> String>);
7+
}
8+
9+
#[wasm_bindgen_test]
10+
fn string_roundtrip() {
11+
test_string_roundtrip(&Closure::wrap(Box::new(|s| s)));
12+
}

tests/wasm/simple.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,3 +92,16 @@ exports.RenamedInRust = class {};
9292
exports.new_renamed = () => new exports.RenamedInRust;
9393

9494
exports.import_export_same_name = () => {};
95+
96+
exports.test_string_roundtrip = () => {
97+
const test = s => {
98+
assert.strictEqual(wasm.do_string_roundtrip(s), s);
99+
};
100+
101+
test('');
102+
test('a');
103+
test('💖');
104+
105+
test('a longer string');
106+
test('a longer 💖 string');
107+
};

tests/wasm/simple.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@ extern "C" {
2727
#[wasm_bindgen(js_name = RenamedInRust)]
2828
type Renamed;
2929
fn new_renamed() -> Renamed;
30+
31+
fn test_string_roundtrip();
3032
}
3133

3234
#[wasm_bindgen_test]
@@ -201,3 +203,13 @@ fn renaming_imports_and_instanceof() {
201203
pub fn import_export_same_name() {
202204
js_import_export_same_name();
203205
}
206+
207+
#[wasm_bindgen_test]
208+
fn string_roundtrip() {
209+
test_string_roundtrip();
210+
}
211+
212+
#[wasm_bindgen]
213+
pub fn do_string_roundtrip(s: String) -> String {
214+
s
215+
}

0 commit comments

Comments
 (0)