Skip to content

Commit 8877b5f

Browse files
addaleaxBridgeAR
authored andcommitted
lib: add ASCII fast path to getStringWidth()
A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: #29301 Reviewed-By: Gus Caplan <[email protected]> Reviewed-By: Trivikram Kamat <[email protected]> Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: Colin Ihrig <[email protected]> Reviewed-By: James M Snell <[email protected]> Reviewed-By: Luigi Pinca <[email protected]> Reviewed-By: Minwoo Jung <[email protected]> Reviewed-By: Rich Trott <[email protected]>
1 parent 1f40413 commit 8877b5f

File tree

3 files changed

+74
-7
lines changed

3 files changed

+74
-7
lines changed

benchmark/misc/getstringwidth.js

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
'use strict';
2+
3+
const common = require('../common.js');
4+
5+
const bench = common.createBenchmark(main, {
6+
type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'],
7+
n: [10e4]
8+
}, {
9+
flags: ['--expose-internals']
10+
});
11+
12+
function main({ n, type }) {
13+
const { getStringWidth } = require('internal/readline/utils');
14+
15+
const str = ({
16+
ascii: 'foobar'.repeat(100),
17+
mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100),
18+
emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10),
19+
fullwidth: '你好'.repeat(150)
20+
})[type];
21+
22+
bench.start();
23+
for (let j = 0; j < n; j += 1)
24+
getStringWidth(str);
25+
bench.end(n);
26+
}

lib/internal/readline/utils.js

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) {
3434
const icu = internalBinding('icu');
3535
getStringWidth = function getStringWidth(str, options) {
3636
options = options || {};
37-
if (!Number.isInteger(str))
38-
str = stripVTControlCharacters(String(str));
39-
return icu.getStringWidth(
40-
str,
41-
Boolean(options.ambiguousAsFullWidth),
42-
Boolean(options.expandEmojiSequence)
43-
);
37+
if (Number.isInteger(str)) {
38+
// Provide information about the character with code point 'str'.
39+
return icu.getStringWidth(
40+
str,
41+
Boolean(options.ambiguousAsFullWidth),
42+
false
43+
);
44+
}
45+
str = stripVTControlCharacters(String(str));
46+
let width = 0;
47+
for (let i = 0; i < str.length; i++) {
48+
// Try to avoid calling into C++ by first handling the ASCII portion of
49+
// the string. If it is fully ASCII, we skip the C++ part.
50+
const code = str.charCodeAt(i);
51+
if (code < 127) {
52+
width += code >= 32;
53+
continue;
54+
}
55+
width += icu.getStringWidth(
56+
str.slice(i),
57+
Boolean(options.ambiguousAsFullWidth),
58+
Boolean(options.expandEmojiSequence)
59+
);
60+
break;
61+
}
62+
return width;
4463
};
4564
isFullWidthCodePoint =
4665
function isFullWidthCodePoint(code, options) {

test/parallel/test-icu-stringwidth.js

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,3 +69,25 @@ assert.strictEqual(
6969

7070
// Control chars and combining chars are zero
7171
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
72+
73+
// Test that the fast path for ASCII characters yields results consistent
74+
// with the 'slow' path.
75+
for (const ambiguousAsFullWidth of [ false, true ]) {
76+
for (let i = 0; i < 256; i++) {
77+
const char = String.fromCharCode(i);
78+
assert.strictEqual(
79+
readline.getStringWidth(i, { ambiguousAsFullWidth }),
80+
readline.getStringWidth(char, { ambiguousAsFullWidth }));
81+
assert.strictEqual(
82+
readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
83+
readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
84+
85+
if (i < 32 || (i >= 127 && i < 160)) { // Control character
86+
assert.strictEqual(
87+
readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
88+
} else if (i < 127) { // Regular ASCII character
89+
assert.strictEqual(
90+
readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
91+
}
92+
}
93+
}

0 commit comments

Comments
 (0)