diff --git a/Makefile b/Makefile index c22e5ea..e1b1aca 100755 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ lib/lexer.wasm: include-wasm/cjs-module-lexer.h src/lexer.c @mkdir -p lib ../wasi-sdk-11.0/bin/clang src/lexer.c -I include-wasm --sysroot=../wasi-sdk-11.0/share/wasi-sysroot -o lib/lexer.wasm -nostartfiles \ -Wl,-z,stack-size=13312,--no-entry,--compress-relocations,--strip-all,--export=__heap_base,\ - --export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ree \ + --export=parseCJS,--export=sa,--export=e,--export=re,--export=es,--export=ee,--export=rre,--export=ree,--export=res,--export=ru,--export=us,--export=ue \ -Wno-logical-op-parentheses -Wno-parentheses \ -Oz diff --git a/README.md b/README.md index ce754fc..49aa836 100755 --- a/README.md +++ b/README.md @@ -86,7 +86,9 @@ EXPORTS_SPREAD: `...` (IDENTIFIER | REQUIRE) EXPORTS_MEMBER: EXPORTS_DOT_ASSIGN | EXPORTS_LITERAL_COMPUTED_ASSIGN -EXPORTS_DEFINE: `Object` `.` `defineProperty `(` IDENTIFIER_STRING `, {` +EXPORTS_DEFINE: `Object` `.` `defineProperty `(` EXPORTS_IDENFITIER `,` IDENTIFIER_STRING + +EXPORTS_DEFINE_VALUE: EXPORTS_DEFINE `, {` (`enumerable: true,`)? ( `value:` | @@ -121,7 +123,9 @@ EXPORT_STAR_LIB: `Object.keys(` IDENTIFIER$1 `).forEach(function (` IDENTIFIER$2 Spacing between tokens is taken to be any ECMA-262 whitespace, ECMA-262 block comment or ECMA-262 line comment. -* The returned export names are taken to be the combination of the `IDENTIFIER` and `IDENTIFIER_STRING` slots for all `EXPORTS_MEMBER`, `EXPORTS_LITERAL` and `EXPORTS_DEFINE` matches. +* The returned export names are taken to be the combination of: + 1. All `IDENTIFIER` and `IDENTIFIER_STRING` slots for `EXPORTS_MEMBER` and `EXPORTS_LITERAL` matches. + 2. The first `IDENTIFIER_STRING` slot for all `EXPORTS_DEFINE_VALUE` matches where that same string is not an `EXPORTS_DEFINE` match that is not also an `EXPORTS_DEFINE_VALUE` match. * The reexport specifiers are taken to be the the combination of: 1. The `REQUIRE` matches of the last matched of either `MODULE_EXPORTS_ASSIGN` or `EXPORTS_LITERAL`. 2. All _top-level_ `EXPORT_STAR` `REQUIRE` matches and `EXPORTS_ASSIGN` matches whose `IDENTIFIER` also matches the first `IDENTIFIER` in `EXPORT_STAR_LIB`. @@ -162,6 +166,8 @@ It will in turn underclassify in cases where the identifiers are renamed: })(exports); ``` +#### Getter Exports Parsing + `Object.defineProperty` is detected for specifically value and getter forms returning an identifier or member expression: ```js @@ -188,6 +194,24 @@ Object.defineProperty(exports, 'd', { value: 'd' }); Object.defineProperty(exports, '__esModule', { value: true }); ``` +To avoid matching getters that have side effects, any getter for an export name that does not support the forms above will +opt-out of the getter matching: + +```js +// DETECTS: NO EXPORTS +Object.defineProperty(exports, 'a', { + value: 'no problem' +}); + +if (false) { + Object.defineProperty(module.exports, 'a', { + get () { + return dynamic(); + } + }) +} +``` + Alternative object definition structures or getter function bodies are not detected: ```js @@ -337,63 +361,63 @@ JS Build: ``` Module load time -> 5ms +> 4ms Cold Run, All Samples test/samples/*.js (3635 KiB) -> 323ms +> 299ms Warm Runs (average of 25 runs) test/samples/angular.js (1410 KiB) -> 14.84ms +> 13.96ms test/samples/angular.min.js (303 KiB) -> 4.8ms +> 4.72ms test/samples/d3.js (553 KiB) -> 7.84ms +> 6.76ms test/samples/d3.min.js (250 KiB) > 4ms test/samples/magic-string.js (34 KiB) -> 0.72ms +> 0.64ms test/samples/magic-string.min.js (20 KiB) -> 0.4ms +> 0ms test/samples/rollup.js (698 KiB) -> 9.32ms +> 8.48ms test/samples/rollup.min.js (367 KiB) -> 6.52ms +> 5.36ms Warm Runs, All Samples (average of 25 runs) test/samples/*.js (3635 KiB) -> 44ms +> 40.28ms ``` Wasm Build: ``` Module load time -> 11ms +> 10ms Cold Run, All Samples test/samples/*.js (3635 KiB) -> 42ms +> 43ms Warm Runs (average of 25 runs) test/samples/angular.js (1410 KiB) -> 9.92ms +> 9.32ms test/samples/angular.min.js (303 KiB) -> 3.2ms +> 3.16ms test/samples/d3.js (553 KiB) -> 5.2ms +> 5ms test/samples/d3.min.js (250 KiB) -> 2.52ms +> 2.32ms test/samples/magic-string.js (34 KiB) > 0.16ms test/samples/magic-string.min.js (20 KiB) -> 0.04ms +> 0ms test/samples/rollup.js (698 KiB) -> 6.44ms +> 6.28ms test/samples/rollup.min.js (367 KiB) -> 3.96ms +> 3.6ms Warm Runs, All Samples (average of 25 runs) test/samples/*.js (3635 KiB) -> 30.48ms +> 27.76ms ``` ### Wasm Build Steps diff --git a/bench/index.mjs b/bench/index.mjs index 532f8ab..7ef0660 100755 --- a/bench/index.mjs +++ b/bench/index.mjs @@ -31,7 +31,7 @@ Promise.resolve().then(async () => { console.log('Module load time'); { const start = process.hrtime.bigint(); - var { default: parse } = await import('../lexer.js'); + var { parse } = await import('../lexer.js'); console.log(`> ${c.bold.green(Math.round(Number(process.hrtime.bigint() - start) / 1e6) + 'ms')}`); } diff --git a/include-wasm/cjs-module-lexer.h b/include-wasm/cjs-module-lexer.h index 92143bd..1900d92 100755 --- a/include-wasm/cjs-module-lexer.h +++ b/include-wasm/cjs-module-lexer.h @@ -30,6 +30,9 @@ Slice* export_write_head = NULL; Slice* first_reexport = NULL; Slice* reexport_read_head = NULL; Slice* reexport_write_head = NULL; +Slice* first_unsafe_getter = NULL; +Slice* unsafe_getter_read_head = NULL; +Slice* unsafe_getter_write_head = NULL; void* analysis_base; void* analysis_head; @@ -48,6 +51,9 @@ const uint16_t* sa (uint32_t utf16Len) { first_reexport = NULL; reexport_write_head = NULL; reexport_read_head = NULL; + first_unsafe_getter = NULL; + unsafe_getter_write_head = NULL; + unsafe_getter_read_head = NULL; return source; } @@ -72,6 +78,14 @@ uint32_t res () { uint32_t ree () { return reexport_read_head->end - source; } +// getUnsafeGetterStart +uint32_t us () { + return unsafe_getter_read_head->start - source; +} +// getUnsafeGetterEnd +uint32_t ue () { + return unsafe_getter_read_head->end - source; +} // readExport bool re () { if (export_read_head == NULL) @@ -92,6 +106,16 @@ bool rre () { return false; return true; } +// readUnsafeGetter +bool ru () { + if (unsafe_getter_read_head == NULL) + unsafe_getter_read_head = first_unsafe_getter; + else + unsafe_getter_read_head = unsafe_getter_read_head->next; + if (unsafe_getter_read_head == NULL) + return false; + return true; +} bool parse (uint32_t point); @@ -119,14 +143,27 @@ void _addReexport (const uint16_t* start, const uint16_t* end) { reexport->end = end; reexport->next = NULL; } +void _addUnsafeGetter (const uint16_t* start, const uint16_t* end) { + Slice* unsafe_getter = (Slice*)(analysis_head); + analysis_head = analysis_head + sizeof(Slice); + if (unsafe_getter_write_head == NULL) + first_unsafe_getter = unsafe_getter; + else + unsafe_getter_write_head->next = unsafe_getter; + unsafe_getter_write_head = unsafe_getter; + unsafe_getter->start = start; + unsafe_getter->end = end; + unsafe_getter->next = NULL; +} void _clearReexports () { reexport_write_head = NULL; first_reexport = NULL; } void (*addExport)(const uint16_t*, const uint16_t*) = &_addExport; void (*addReexport)(const uint16_t*, const uint16_t*) = &_addReexport; +void (*addUnsafeGetter)(const uint16_t*, const uint16_t*) = &_addUnsafeGetter; void (*clearReexports)() = &_clearReexports; -bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*clearReexports)()); +bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t* start, const uint16_t* end), void (*addReexport)(const uint16_t* start, const uint16_t* end), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)()); enum RequireType { Import, diff --git a/include/cjs-module-lexer.h b/include/cjs-module-lexer.h index b9be2f3..17fa4a6 100755 --- a/include/cjs-module-lexer.h +++ b/include/cjs-module-lexer.h @@ -27,7 +27,7 @@ typedef struct StarExportBinding StarExportBinding; void bail (uint32_t err); -bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*clearReexports)()); +bool parseCJS (uint16_t* source, uint32_t sourceLen, void (*addExport)(const uint16_t*, const uint16_t*), void (*addReexport)(const uint16_t*, const uint16_t*), void (*addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*clearReexports)()); enum RequireType { Import, diff --git a/lexer.js b/lexer.js index 4836ebf..ed81460 100755 --- a/lexer.js +++ b/lexer.js @@ -11,6 +11,7 @@ let openTokenDepth, starExportMap, lastStarExportSpecifier, _exports, + unsafeGetters, reexports; function resetState () { @@ -27,6 +28,7 @@ function resetState () { lastStarExportSpecifier = null; _exports = new Set(); + unsafeGetters = new Set(); reexports = new Set(); } @@ -47,7 +49,7 @@ function parseCJS (source, name = '@') { e.loc = pos; throw e; } - const result = { exports: [..._exports], reexports: [...reexports] }; + const result = { exports: [..._exports].filter(expt => !unsafeGetters.has(expt)), reexports: [...reexports] }; resetState(); return result; } @@ -260,6 +262,7 @@ function tryParseObjectDefineOrKeys (keys) { pos++; ch = commentWhitespace(); if (ch === 100/*d*/ && source.startsWith('efineProperty', pos + 1)) { + let expt; while (true) { pos += 14; revertPos = pos - 1; @@ -276,7 +279,7 @@ function tryParseObjectDefineOrKeys (keys) { let quot = ch; const exportPos = ++pos; if (!identifier() || source.charCodeAt(pos) !== quot) break; - const expt = source.slice(exportPos, pos); + expt = source.slice(exportPos, pos); pos++; ch = commentWhitespace(); if (ch !== 44/*,*/) break; @@ -304,9 +307,9 @@ function tryParseObjectDefineOrKeys (keys) { pos += 5; ch = commentWhitespace(); if (ch !== 58/*:*/) break; - pos++; addExport(expt); - break; + pos = revertPos; + return; } else if (ch === 103/*g*/) { if (!source.startsWith('et', pos + 1)) break; @@ -372,6 +375,9 @@ function tryParseObjectDefineOrKeys (keys) { } break; } + if (expt) { + unsafeGetters.add(expt); + } } else if (keys && ch === 107/*k*/ && source.startsWith('eys', pos + 1)) { while (true) { diff --git a/src/lexer.c b/src/lexer.c index 16deb61..bcfd9a9 100755 --- a/src/lexer.c +++ b/src/lexer.c @@ -41,16 +41,19 @@ const StarExportBinding* STAR_EXPORT_STACK_END = &starExportStack_[MAX_STAR_EXPO void (*addExport)(const uint16_t*, const uint16_t*); void (*addReexport)(const uint16_t*, const uint16_t*); +void (*addUnsafeGetter)(const uint16_t*, const uint16_t*); void (*clearReexports)(); // Note: parsing is based on the _assumption_ that the source is already valid -bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) { +bool parseCJS (uint16_t* _source, uint32_t _sourceLen, void (*_addExport)(const uint16_t*, const uint16_t*), void (*_addReexport)(const uint16_t*, const uint16_t*), void (*_addUnsafeGetter)(const uint16_t*, const uint16_t*), void (*_clearReexports)()) { source = _source; sourceLen = _sourceLen; if (_addExport) addExport = _addExport; if (_addReexport) addReexport = _addReexport; + if (_addUnsafeGetter) + addUnsafeGetter = _addUnsafeGetter; templateStackDepth = 0; openTokenDepth = 0; @@ -272,6 +275,8 @@ void tryParseObjectDefineOrKeys (bool keys) { pos++; ch = commentWhitespace(); if (ch == 'd' && str_eq13(pos + 1, 'e', 'f', 'i', 'n', 'e', 'P', 'r', 'o', 'p', 'e', 'r', 't', 'y')) { + uint16_t* exportStart = 0; + uint16_t* exportEnd = 0; while (true) { pos += 14; revertPos = pos - 1; @@ -286,9 +291,9 @@ void tryParseObjectDefineOrKeys (bool keys) { ch = commentWhitespace(); if (ch != '\'' && ch != '"') break; uint16_t quot = ch; - uint16_t* exportStart = ++pos; + exportStart = ++pos; if (!identifier(*pos) || *pos != quot) break; - uint16_t* exportEnd = pos; + exportEnd = pos; pos++; ch = commentWhitespace(); if (ch != ',') break; @@ -316,9 +321,9 @@ void tryParseObjectDefineOrKeys (bool keys) { pos += 5; ch = commentWhitespace(); if (ch != ':') break; - pos++; addExport(exportStart, exportEnd); - break; + pos = revertPos; + return; } else if (ch == 'g') { if (!str_eq2(pos + 1, 'e', 't')) break; @@ -388,6 +393,9 @@ void tryParseObjectDefineOrKeys (bool keys) { } break; } + if (exportEnd > 0) { + addUnsafeGetter(exportStart, exportEnd); + } } else if (keys && ch == 'k' && str_eq3(pos + 1, 'e', 'y', 's')) { while (true) { diff --git a/src/lexer.js b/src/lexer.js index 0540dad..0b53a7e 100755 --- a/src/lexer.js +++ b/src/lexer.js @@ -18,15 +18,18 @@ export function parse (source, name = '@') { const addr = wasm.sa(len); (isLE ? copyLE : copyBE)(source, new Uint16Array(wasm.memory.buffer, addr, len)); - if (!wasm.parseCJS(addr, source.length, 0, 0)) + if (!wasm.parseCJS(addr, source.length, 0, 0, 0)) throw Object.assign(new Error(`Parse error ${name}${wasm.e()}:${source.slice(0, wasm.e()).split('\n').length}:${wasm.e() - source.lastIndexOf('\n', wasm.e() - 1)}`), { idx: wasm.e() }); - let exports = new Set(), reexports = new Set(); + let exports = new Set(), reexports = new Set(), unsafeGetters = new Set(); + while (wasm.rre()) reexports.add(source.slice(wasm.res(), wasm.ree())); + while (wasm.ru()) + unsafeGetters.add(source.slice(wasm.us(), wasm.ue())); while (wasm.re()) { let exptStr = source.slice(wasm.es(), wasm.ee()); - if (!strictReserved.has(exptStr)) + if (!strictReserved.has(exptStr) && !unsafeGetters.has(exptStr)) exports.add(exptStr); } diff --git a/test/_unit.js b/test/_unit.js index f72a496..71af716 100755 --- a/test/_unit.js +++ b/test/_unit.js @@ -16,6 +16,28 @@ async function loadParser () { suite('Lexer', () => { beforeEach(async () => await loadParser()); + test('Getter opt-outs', () => { + var { exports } = parse(` + Object.defineProperty(exports, 'a', { + enumerable: true, + get: function () { + return q.p; + } + }); + + if (false) { + Object.defineProperty(exports, 'a', { + enumerable: false, + get: function () { + return dynamic(); + } + }); + } + `); + + assert.equal(exports.length, 0); + }); + test('TypeScript reexports', () => { var { exports, reexports } = parse(` "use strict";