From 918159e0f6601a6314cc7aa0eb30a170a349ff8c Mon Sep 17 00:00:00 2001 From: Brian White Date: Wed, 4 May 2016 11:33:31 -0400 Subject: [PATCH 1/5] http: optimize checkIsHttpToken() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit both makes checkIsHttpToken() inlinable and extracts the character checking logic to a separate inlinable function so that the main loop can be unrolled a bit. PR-URL: https://github.com/nodejs/node/pull/6570 Reviewed-By: James M Snell Reviewed-By: Ben Noordhuis Reviewed-By: Сковорода Никита Андреевич --- lib/_http_common.js | 85 ++++++++++++++++++++++++++------------------- 1 file changed, 49 insertions(+), 36 deletions(-) diff --git a/lib/_http_common.js b/lib/_http_common.js index 583c5d610a5ed9..150d4242617aab 100644 --- a/lib/_http_common.js +++ b/lib/_http_common.js @@ -232,52 +232,65 @@ exports.httpSocketSetup = httpSocketSetup; * per the rules defined in RFC 7230 * See https://tools.ietf.org/html/rfc7230#section-3.2.6 * + * Allowed characters in an HTTP token: + * ^_`a-z 94-122 + * A-Z 65-90 + * - 45 + * 0-9 48-57 + * ! 33 + * #$%&' 35-39 + * *+ 42-43 + * . 46 + * | 124 + * ~ 126 + * * This implementation of checkIsHttpToken() loops over the string instead of * using a regular expression since the former is up to 180% faster with v8 4.9 * depending on the string length (the shorter the string, the larger the * performance difference) + * + * Additionally, checkIsHttpToken() is currently designed to be inlinable by v8, + * so take care when making changes to the implementation so that the source + * code size does not exceed v8's default max_inlined_source_size setting. **/ +function isValidTokenChar(ch) { + if (ch >= 94 && ch <= 122) + return true; + if (ch >= 65 && ch <= 90) + return true; + if (ch === 45) + return true; + if (ch >= 48 && ch <= 57) + return true; + if (ch === 34 || ch === 40 || ch === 41 || ch === 44) + return false; + if (ch >= 33 && ch <= 46) + return true; + if (ch === 124 || ch === 126) + return true; + return false; +} function checkIsHttpToken(val) { if (typeof val !== 'string' || val.length === 0) return false; - - for (var i = 0, len = val.length; i < len; i++) { - var ch = val.charCodeAt(i); - - if (ch >= 65 && ch <= 90) // A-Z - continue; - - if (ch >= 97 && ch <= 122) // a-z - continue; - - // ^ => 94 - // _ => 95 - // ` => 96 - // | => 124 - // ~ => 126 - if (ch === 94 || ch === 95 || ch === 96 || ch === 124 || ch === 126) - continue; - - if (ch >= 48 && ch <= 57) // 0-9 - continue; - - // ! => 33 - // # => 35 - // $ => 36 - // % => 37 - // & => 38 - // ' => 39 - // * => 42 - // + => 43 - // - => 45 - // . => 46 - if (ch >= 33 && ch <= 46) { - if (ch === 34 || ch === 40 || ch === 41 || ch === 44) + if (!isValidTokenChar(val.charCodeAt(0))) + return false; + const len = val.length; + if (len > 1) { + if (!isValidTokenChar(val.charCodeAt(1))) + return false; + if (len > 2) { + if (!isValidTokenChar(val.charCodeAt(2))) return false; - continue; + if (len > 3) { + if (!isValidTokenChar(val.charCodeAt(3))) + return false; + for (var i = 4; i < len; i++) { + if (!isValidTokenChar(val.charCodeAt(i))) + return false; + } + } } - - return false; } return true; } From 83432bfff1e21797e497aacf4c473db1345f6334 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sat, 7 May 2016 17:03:35 -0400 Subject: [PATCH 2/5] http: optimize checkInvalidHeaderChar() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit optimizes checkInvalidHeaderChar() by unrolling the character checking loop a bit. Additionally, some changes to the benchmark runner are needed in order for the included benchmark to be run correctly. Specifically, the regexp used to parse `key=value` parameters contained a greedy quantifier that was causing the `key` to match part of the `value` if `value` contained an equals sign. PR-URL: https://github.com/nodejs/node/pull/6570 Reviewed-By: James M Snell Reviewed-By: Ben Noordhuis Reviewed-By: Сковорода Никита Андреевич --- benchmark/common.js | 8 ++-- benchmark/http/check_invalid_header_char.js | 42 +++++++++++++++++++++ lib/_http_common.js | 29 +++++++++++--- 3 files changed, 69 insertions(+), 10 deletions(-) create mode 100644 benchmark/http/check_invalid_header_char.js diff --git a/benchmark/common.js b/benchmark/common.js index 71b93d038aeb23..d937e13d93fbf7 100644 --- a/benchmark/common.js +++ b/benchmark/common.js @@ -191,7 +191,7 @@ function parseOpts(options) { var num = keys.length; var conf = {}; for (var i = 2; i < process.argv.length; i++) { - var match = process.argv[i].match(/^(.+)=(.*)$/); + var match = process.argv[i].match(/^(.+?)=([\s\S]*)$/); if (!match || !match[1] || !options[match[1]]) { return null; } else { @@ -238,8 +238,6 @@ Benchmark.prototype.report = function(value) { console.log('%s: %s', heading, value.toFixed(5)); else if (outputFormat == 'csv') console.log('%s,%s', heading, value.toFixed(5)); - - process.exit(0); }; Benchmark.prototype.getHeading = function() { @@ -247,11 +245,11 @@ Benchmark.prototype.getHeading = function() { if (outputFormat == 'default') { return this._name + ' ' + Object.keys(conf).map(function(key) { - return key + '=' + conf[key]; + return key + '=' + JSON.stringify('' + conf[key]); }).join(' '); } else if (outputFormat == 'csv') { return this._name + ',' + Object.keys(conf).map(function(key) { - return conf[key]; + return JSON.stringify('' + conf[key]); }).join(','); } }; diff --git a/benchmark/http/check_invalid_header_char.js b/benchmark/http/check_invalid_header_char.js new file mode 100644 index 00000000000000..bef44e63167078 --- /dev/null +++ b/benchmark/http/check_invalid_header_char.js @@ -0,0 +1,42 @@ +'use strict'; + +const common = require('../common.js'); +const _checkInvalidHeaderChar = require('_http_common')._checkInvalidHeaderChar; + +const bench = common.createBenchmark(main, { + key: [ + // Valid + '', + '1', + '\t\t\t\t\t\t\t\t\t\tFoo bar baz', + 'keep-alive', + 'close', + 'gzip', + '20091', + 'private', + 'text/html; charset=utf-8', + 'text/plain', + 'Sat, 07 May 2016 16:54:48 GMT', + 'SAMEORIGIN', + 'en-US', + + // Invalid + 'Here is a value that is really a folded header value\r\n this should be \ + supported, but it is not currently', + '中文呢', // unicode + 'foo\nbar', + '\x7F' + ], + n: [5e8], +}); + +function main(conf) { + var n = +conf.n; + var key = conf.key; + + bench.start(); + for (var i = 0; i < n; i++) { + _checkInvalidHeaderChar(key); + } + bench.end(n); +} diff --git a/lib/_http_common.js b/lib/_http_common.js index 150d4242617aab..ad0389ba2138ae 100644 --- a/lib/_http_common.js +++ b/lib/_http_common.js @@ -301,13 +301,32 @@ exports._checkIsHttpToken = checkIsHttpToken; * field-value = *( field-content / obs-fold ) * field-content = field-vchar [ 1*( SP / HTAB ) field-vchar ] * field-vchar = VCHAR / obs-text + * + * checkInvalidHeaderChar() is currently designed to be inlinable by v8, + * so take care when making changes to the implementation so that the source + * code size does not exceed v8's default max_inlined_source_size setting. **/ function checkInvalidHeaderChar(val) { - val = '' + val; - for (var i = 0; i < val.length; i++) { - const ch = val.charCodeAt(i); - if (ch === 9) continue; - if (ch <= 31 || ch > 255 || ch === 127) return true; + val += ''; + if (val.length < 1) + return false; + var c = val.charCodeAt(0); + if ((c <= 31 && c !== 9) || c > 255 || c === 127) + return true; + if (val.length < 2) + return false; + c = val.charCodeAt(1); + if ((c <= 31 && c !== 9) || c > 255 || c === 127) + return true; + if (val.length < 3) + return false; + c = val.charCodeAt(2); + if ((c <= 31 && c !== 9) || c > 255 || c === 127) + return true; + for (var i = 3; i < val.length; ++i) { + c = val.charCodeAt(i); + if ((c <= 31 && c !== 9) || c > 255 || c === 127) + return true; } return false; } From 39fdf0773d7066d33f562de8bb7220708f4ab619 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sat, 7 May 2016 17:20:23 -0400 Subject: [PATCH 3/5] benchmark: increase http token check iterations PR-URL: https://github.com/nodejs/node/pull/6570 Reviewed-By: James M Snell Reviewed-By: Ben Noordhuis --- benchmark/http/check_is_http_token.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benchmark/http/check_is_http_token.js b/benchmark/http/check_is_http_token.js index 9e0b8279b58ed0..a317e05a4a12d2 100644 --- a/benchmark/http/check_is_http_token.js +++ b/benchmark/http/check_is_http_token.js @@ -37,7 +37,7 @@ const bench = common.createBenchmark(main, { ':alternate-protocol', // fast bailout 'alternate-protocol:' // slow bailout ], - n: [1e6], + n: [5e8], }); function main(conf) { From c8fa79f35128388496b42e41adcec486797f6122 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sat, 7 May 2016 17:21:22 -0400 Subject: [PATCH 4/5] test: add more http token/value checking tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-URL: https://github.com/nodejs/node/pull/6570 Reviewed-By: James M Snell Reviewed-By: Ben Noordhuis Reviewed-By: Сковорода Никита Андреевич --- .../parallel/test-http-invalidheaderfield2.js | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 test/parallel/test-http-invalidheaderfield2.js diff --git a/test/parallel/test-http-invalidheaderfield2.js b/test/parallel/test-http-invalidheaderfield2.js new file mode 100644 index 00000000000000..727c600d5d79a3 --- /dev/null +++ b/test/parallel/test-http-invalidheaderfield2.js @@ -0,0 +1,97 @@ +'use strict'; +require('../common'); +const assert = require('assert'); +const inspect = require('util').inspect; +const checkIsHttpToken = require('_http_common')._checkIsHttpToken; +const checkInvalidHeaderChar = require('_http_common')._checkInvalidHeaderChar; + +// Good header field names +[ + 'TCN', + 'ETag', + 'date', + 'alt-svc', + 'Content-Type', + '0', + 'Set-Cookie2', + 'Set_Cookie', + 'foo`bar^', + 'foo|bar', + '~foobar', + 'FooBar!', + '#Foo', + '$et-Cookie', + '%%Test%%', + 'Test&123', + 'It\'s_fun', + '2*3', + '4+2', + '3.14159265359' +].forEach(function(str) { + assert.strictEqual(checkIsHttpToken(str), + true, + 'checkIsHttpToken(' + + inspect(str) + + ') unexpectedly failed'); +}); +// Bad header field names +[ + ':', + '@@', + '中文呢', // unicode + '((((())))', + ':alternate-protocol', + 'alternate-protocol:', + 'foo\nbar', + 'foo\rbar', + 'foo\r\nbar', + 'foo\x00bar', + '\x7FMe!', + '{Start', + '(Start', + '[Start', + 'End}', + 'End)', + 'End]', + '"Quote"', + 'This,That' +].forEach(function(str) { + assert.strictEqual(checkIsHttpToken(str), + false, + 'checkIsHttpToken(' + + inspect(str) + + ') unexpectedly succeeded'); +}); + + +// Good header field values +[ + 'foo bar', + 'foo\tbar', + '0123456789ABCdef', + '!@#$%^&*()-_=+\\;\':"[]{}<>,./?|~`' +].forEach(function(str) { + assert.strictEqual(checkInvalidHeaderChar(str), + false, + 'checkInvalidHeaderChar(' + + inspect(str) + + ') unexpectedly failed'); +}); + +// Bad header field values +[ + 'foo\rbar', + 'foo\nbar', + 'foo\r\nbar', + '中文呢', // unicode + '\x7FMe!', + 'Testing 123\x00', + 'foo\vbar', + 'Ding!\x07' +].forEach(function(str) { + assert.strictEqual(checkInvalidHeaderChar(str), + true, + 'checkInvalidHeaderChar(' + + inspect(str) + + ') unexpectedly succeeded'); +}); From c570182a39ada502d0f65223728b0cd9e136cda0 Mon Sep 17 00:00:00 2001 From: Brian White Date: Sat, 28 May 2016 00:56:28 -0400 Subject: [PATCH 5/5] benchmark: don't convert arguments to numbers PR-URL: https://github.com/nodejs/node/pull/6570 Reviewed-By: James M Snell Reviewed-By: Ben Noordhuis --- benchmark/common.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/benchmark/common.js b/benchmark/common.js index d937e13d93fbf7..d49aef106f1c11 100644 --- a/benchmark/common.js +++ b/benchmark/common.js @@ -195,9 +195,7 @@ function parseOpts(options) { if (!match || !match[1] || !options[match[1]]) { return null; } else { - conf[match[1]] = (match[2].length && isFinite(match[2]) - ? +match[2] - : match[2]); + conf[match[1]] = match[2]; num--; } }