From eaef6693056518b48e112b1b53f6fa180fc967b1 Mon Sep 17 00:00:00 2001 From: Sam Clegg Date: Sat, 20 Apr 2024 17:42:05 -0700 Subject: [PATCH] Ensure all unused elements are removed during applyDCEGraphRemovals. NFC There were bugs in `applyDCEGraphRemovals` that were preventing it from finding certain unused exports in certain cases. This change cleanup up the pass and adds assertions that all unused imports and exports are actually located and removed, thus preventing these types of bugs from sneaking in again. The test we had for `applyDCEGraphRemovals` were worked, but they were writing in a slightly different style to the current emscripten output. In particular the export name and JS name were matching, even though the actual compiler output produces JS names that contain a leading underscore. I also updates the tests to match this style for consistency. I believe the effects of this bug were not captured by our code size tests because they all run closure compiler afterwards which was removing the unused exports itself. --- .../optimizer/applyDCEGraphRemovals-output.js | 36 ++--- test/optimizer/applyDCEGraphRemovals.js | 46 ++++--- ...al-runtime-applyDCEGraphRemovals-output.js | 12 +- .../minimal-runtime-applyDCEGraphRemovals.js | 24 ++-- tools/acorn-optimizer.mjs | 125 ++++++++++-------- tools/building.py | 37 ++++-- tools/emscripten.py | 3 + tools/extract_metadata.py | 7 - 8 files changed, 164 insertions(+), 126 deletions(-) diff --git a/test/optimizer/applyDCEGraphRemovals-output.js b/test/optimizer/applyDCEGraphRemovals-output.js index 462c8a8b53c91..bc24fa45223a1 100644 --- a/test/optimizer/applyDCEGraphRemovals-output.js +++ b/test/optimizer/applyDCEGraphRemovals-output.js @@ -5,38 +5,30 @@ var wasmImports = { save2: 2 }; -var expD1 = Module["expD1"] = wasmExports["expD1"]; +var _expD1 = Module["_expD1"] = wasmExports["expD1"]; -var expD2 = Module["expD2"] = wasmExports["expD2"]; +var _expD2 = Module["_expD2"] = wasmExports["expD2"]; -var expD3 = Module["expD3"] = wasmExports["expD3"]; +var _expD3 = Module["_expD3"] = wasmExports["expD3"]; -var expD4; +var _expD5 = wasmExports["expD5"]; -var expD5 = wasmExports["expD5"]; +var _expI1 = Module["_expI1"] = () => (expI1 = Module["_expI1"] = wasmExports["expI1"])(); -var expD6; +var _expI2 = Module["_expI2"] = () => (expI2 = Module["_expI2"] = wasmExports["expI2"])(); -var expI1 = Module["expI1"] = () => (expI1 = Module["expI1"] = wasmExports["expI1"])(); +var _expI3 = Module["_expI3"] = () => (expI3 = Module["_expI3"] = wasmExports["expI3"])(); -var expI2 = Module["expI2"] = () => (expI2 = Module["expI2"] = wasmExports["expI2"])(); +var _expI5 = () => (_expI5 = wasmExports["expI5"])(); -var expI3 = Module["expI3"] = () => (expI3 = Module["expI3"] = wasmExports["expI3"])(); +_expD1; -var expI4; +Module["_expD2"]; -var expI5 = () => (expI5 = wasmExports["expI5"])(); +wasmExports["_expD3"]; -var expI6; +_expI1; -expD1; +Module["_expI2"]; -Module["expD2"]; - -wasmExports["expD3"]; - -expI1; - -Module["expI2"]; - -wasmExports["expI3"]; +wasmExports["_expI3"]; diff --git a/test/optimizer/applyDCEGraphRemovals.js b/test/optimizer/applyDCEGraphRemovals.js index 874c57caeb1d0..7dc5caa35f0c5 100644 --- a/test/optimizer/applyDCEGraphRemovals.js +++ b/test/optimizer/applyDCEGraphRemovals.js @@ -1,32 +1,38 @@ var name; -var wasmImports = { save1: 1, number: 33, name: name, func: function() {}, save2: 2 }; +var wasmImports = { + save1: 1, + number: 33, + name: name, + func: function() {}, + save2: 2 +}; // exports gotten directly -var expD1 = Module['expD1'] = wasmExports['expD1']; -var expD2 = Module['expD2'] = wasmExports['expD2']; -var expD3 = Module['expD3'] = wasmExports['expD3']; -var expD4 = Module['expD4'] = wasmExports['expD4']; +var _expD1 = Module['_expD1'] = wasmExports['expD1']; +var _expD2 = Module['_expD2'] = wasmExports['expD2']; +var _expD3 = Module['_expD3'] = wasmExports['expD3']; +var _expD4 = Module['_expD4'] = wasmExports['expD4']; // Like above, but not exported on the Module -var expD5 = wasmExports['expD5']; -var expD6 = wasmExports['expD6']; +var _expD5 = wasmExports['expD5']; +var _expD6 = wasmExports['expD6']; // exports gotten indirectly (async compilation -var expI1 = Module['expI1'] = () => (expI1 = Module['expI1'] = wasmExports['expI1'])(); -var expI2 = Module['expI2'] = () => (expI2 = Module['expI2'] = wasmExports['expI2'])(); -var expI3 = Module['expI3'] = () => (expI3 = Module['expI3'] = wasmExports['expI3'])(); -var expI4 = Module['expI4'] = () => (expI4 = Module['expI4'] = wasmExports['expI4'])(); +var _expI1 = Module['_expI1'] = () => (expI1 = Module['_expI1'] = wasmExports['expI1'])(); +var _expI2 = Module['_expI2'] = () => (expI2 = Module['_expI2'] = wasmExports['expI2'])(); +var _expI3 = Module['_expI3'] = () => (expI3 = Module['_expI3'] = wasmExports['expI3'])(); +var _expI4 = Module['_expI4'] = () => (expI4 = Module['_expI4'] = wasmExports['expI4'])(); // Like above, but not exported on the Module -var expI5 = () => (expI5 = wasmExports['expI5'])(); -var expI6 = () => (expI6 = wasmExports['expI6'])(); +var _expI5 = () => (_expI5 = wasmExports['expI5'])(); +var _expI6 = () => (_expI6 = wasmExports['expI6'])(); // add uses for some of them, leave *4 as non-roots -expD1; -Module['expD2']; -wasmExports['expD3']; +_expD1; +Module['_expD2']; +wasmExports['_expD3']; -expI1; -Module['expI2']; -wasmExports['expI3']; +_expI1; +Module['_expI2']; +wasmExports['_expI3']; -// EXTRA_INFO: { "unused": ["emcc$import$number", "emcc$import$name", "emcc$import$func", "emcc$export$expD4", "emcc$export$expD6", "emcc$export$expI4", "emcc$export$expI6"] } +// EXTRA_INFO: { "unusedImports": ["number", "name", "func"], "unusedExports": ["expD4", "expD6", "expI4", "expI6"] } diff --git a/test/optimizer/minimal-runtime-applyDCEGraphRemovals-output.js b/test/optimizer/minimal-runtime-applyDCEGraphRemovals-output.js index 972559ece52c0..531d6819eab04 100644 --- a/test/optimizer/minimal-runtime-applyDCEGraphRemovals-output.js +++ b/test/optimizer/minimal-runtime-applyDCEGraphRemovals-output.js @@ -7,15 +7,15 @@ var wasmImports = { WebAssembly.instantiate(Module["wasm"], imports).then(output => { wasmExports = output.instance.exports; - expD1 = wasmExports["expD1"]; - expD2 = wasmExports["expD2"]; - expD3 = wasmExports["expD3"]; + _expD1 = wasmExports["expD1"]; + _expD2 = wasmExports["expD2"]; + _expD3 = wasmExports["expD3"]; initRuntime(wasmExports); ready(); }); -expD1; +_expD1; -Module["expD2"]; +Module["_expD2"]; -wasmExports["expD3"]; +wasmExports["_expD3"]; diff --git a/test/optimizer/minimal-runtime-applyDCEGraphRemovals.js b/test/optimizer/minimal-runtime-applyDCEGraphRemovals.js index c7bebd73ac95b..aec43208bd327 100644 --- a/test/optimizer/minimal-runtime-applyDCEGraphRemovals.js +++ b/test/optimizer/minimal-runtime-applyDCEGraphRemovals.js @@ -1,20 +1,26 @@ var name; -var wasmImports = { save1: 1, number: 33, name: name, func: function() {}, save2: 2 }; +var wasmImports = { + save1: 1, + number: 33, + name: name, + func: function() {}, + save2: 2 +}; // exports gotten directly in the minimal runtime style WebAssembly.instantiate(Module["wasm"], imports).then((output) => { wasmExports = output.instance.exports; - expD1 = wasmExports['expD1']; - expD2 = wasmExports['expD2']; - expD3 = wasmExports['expD3']; - expD4 = wasmExports['expD4']; + _expD1 = wasmExports['expD1']; + _expD2 = wasmExports['expD2']; + _expD3 = wasmExports['expD3']; + _expD4 = wasmExports['expD4']; initRuntime(wasmExports); ready(); }); // add uses for some of them, leave *4 as non-roots -expD1; -Module['expD2']; -wasmExports['expD3']; +_expD1; +Module['_expD2']; +wasmExports['_expD3']; -// EXTRA_INFO: { "unused": ["emcc$import$number", "emcc$import$name", "emcc$import$func", "emcc$export$expD4", "emcc$export$expI4"] } +// EXTRA_INFO: { "unusedImports": ["number", "name", "func"], "unusedExports": ["expD4"] } diff --git a/tools/acorn-optimizer.mjs b/tools/acorn-optimizer.mjs index 1bd212e39fd87..5bdab13e7fc82 100755 --- a/tools/acorn-optimizer.mjs +++ b/tools/acorn-optimizer.mjs @@ -89,14 +89,6 @@ function emptyOut(node) { node.type = 'EmptyStatement'; } -// This converts the node into something that terser will ignore in a var -// declaration, that is, it is a way to get rid of initial values. -function convertToNothingInVarInit(node) { - node.type = 'Literal'; - node.value = undefined; - node.raw = 'undefined'; -} - function convertToNullStatement(node) { node.type = 'ExpressionStatement'; node.expression = { @@ -591,18 +583,25 @@ function isDynamicDynCall(node) { // // Matches the wasm export wrappers generated by emcc (see make_export_wrappers -// in emscripten.py). For example: +// in emscripten.py). For example, the right hand side of these assignments: // // var _foo = (a0, a1) => (_foo = wasmExports['foo'])(a0, a1): // +// or +// +// var _foo = (a0, a1) => (_foo = Module['_foo'] = wasmExports['foo'])(a0, a1): +// function isExportWrapperFunction(f) { if (f.body.type != 'CallExpression') return null; let callee = f.body.callee; if (callee.type == 'ParenthesizedExpression') { callee = callee.expression; } - if (callee.type != 'AssignmentExpression' || callee.right.type != 'MemberExpression') return null; + if (callee.type != 'AssignmentExpression') return null; var rhs = callee.right; + if (rhs.type == 'AssignmentExpression') { + rhs = rhs.right; + } if (rhs.type != 'MemberExpression' || !isExportUse(rhs)) return null; return getExportOrModuleUseName(rhs); } @@ -740,7 +739,9 @@ function emitDCEGraph(ast) { emptyOut(node); } else if (value && value.type === 'ArrowFunctionExpression') { // this is - // var x = () => (x = wasmExports['x'])(..) + // () => (x = wasmExports['x'])(..) + // or + // () => (x = Module['_x'] = wasmExports['x'])(..) let asmName = isExportWrapperFunction(value); if (asmName) { saveAsmExport(name, asmName); @@ -978,7 +979,12 @@ function emitDCEGraph(ast) { // way (and we leave further DCE on the JS and wasm sides to their respective // optimizers, closure compiler and binaryen). function applyDCEGraphRemovals(ast) { - const unused = new Set(extraInfo.unused); + const unusedExports = new Set(extraInfo.unusedExports); + const unusedImports = new Set(extraInfo.unusedImports); + const foundUnusedImports = new Set(); + const foundUnusedExports = new Set(); + trace('unusedExports:', unusedExports); + trace('unusedImports:', unusedImports); fullWalk(ast, (node) => { if (isWasmImportsAssign(node)) { @@ -986,59 +992,71 @@ function applyDCEGraphRemovals(ast) { assignedObject.properties = assignedObject.properties.filter((item) => { const name = item.key.name; const value = item.value; - const full = 'emcc$import$' + name; - return !(unused.has(full) && !hasSideEffects(value)); - }); - } else if (node.type === 'AssignmentExpression') { - // when we assign to a thing we don't need, we can just remove the assign - // var x = Module['x'] = wasmExports['x']; - const target = node.left; - if (isExportUse(target) || isModuleUse(target)) { - const name = getExportOrModuleUseName(target); - const full = 'emcc$export$' + name; - const value = node.right; - if (unused.has(full) && (isExportUse(value) || !hasSideEffects(value))) { - // This will be in a var init, and we just remove that value. - convertToNothingInVarInit(node); + if (unusedImports.has(name)) { + foundUnusedImports.add(name); + return hasSideEffects(value); } - } + return true; + }); } else if (node.type === 'VariableDeclaration') { - // Handle the case we declare a variable but don't assign to the module: - // var x = wasmExports['x']; - // and - // var x = () => (x = wasmExports['x']).apply(...); + // Handle the various ways in which we extract wasmExports: + // 1. var _x = wasmExports['x']; + // or + // 2. var _x = Module['_x'] = wasmExports['x']; + // + // Or for delayed instantiation: + // 3. var _x = () => (_x = wasmExports['x'])(...); + // or + // 4. var _x = Module['_x] = () => (_x = Module['_x'] = wasmExports['x'])(...); const init = node.declarations[0].init; - if (init) { - if (isExportUse(init)) { - const name = getExportOrModuleUseName(init); - const full = 'emcc$export$' + name; - if (unused.has(full)) { - convertToNothingInVarInit(init); - } - } else if (init.type == 'ArrowFunctionExpression') { - const name = isExportWrapperFunction(init); - const full = 'emcc$export$' + name; - if (unused.has(full)) { - convertToNothingInVarInit(init); - } + if (!init) { + return; + } + + // Look through the optional `Module['_x']` + let realInit = init; + if (init.type == 'AssignmentExpression' && isModuleUse(init.left)) { + realInit = init.right; + } + + if (isExportUse(realInit)) { + const export_name = getExportOrModuleUseName(realInit); + if (unusedExports.has(export_name)) { + // Case (1) and (2) + trace('found unused export:', export_name); + emptyOut(node); + foundUnusedExports.add(export_name); + } + } else if (realInit.type == 'ArrowFunctionExpression') { + const export_name = isExportWrapperFunction(realInit); + if (unusedExports.has(export_name)) { + // Case (3) and (4) + trace('found unused export:', export_name); + emptyOut(node); + foundUnusedExports.add(export_name); } } } else if (node.type === 'ExpressionStatement') { const expr = node.expression; // In the MINIMAL_RUNTIME code pattern we have just - // x = wasmExports['x'] + // _x = wasmExports['x'] // and never in a var. if (expr.operator === '=' && expr.left.type === 'Identifier' && isExportUse(expr.right)) { - const name = expr.left.name; - if (name === getExportOrModuleUseName(expr.right)) { - const full = 'emcc$export$' + name; - if (unused.has(full)) { - emptyOut(node); - } + const export_name = getExportOrModuleUseName(expr.right); + if (unusedExports.has(export_name)) { + emptyOut(node); + foundUnusedExports.add(export_name); } } } }); + + for (const i of unusedImports) { + assert(foundUnusedImports.has(i), 'unused import not found: ' + i); + } + for (const e of unusedExports) { + assert(foundUnusedExports.has(e), 'unused export not found: ' + e); + } } // Need a parser to pass to acorn.Node constructor. @@ -2050,7 +2068,10 @@ const registry = { minifyGlobals: minifyGlobals, }; -passes.forEach((pass) => registry[pass](ast)); +passes.forEach((pass) => { + trace(`running AST pass: ${pass}`); + registry[pass](ast); +}); if (!noPrint) { const terserAst = terser.AST_Node.from_mozilla_ast(ast); diff --git a/tools/building.py b/tools/building.py index 01dfd7e49c514..a6c8f7a1964cb 100644 --- a/tools/building.py +++ b/tools/building.py @@ -733,6 +733,15 @@ def minify_wasm_js(js_file, wasm_file, expensive_optimizations, debug_info): return js_file +def is_internal_global(name): + internal_start_stop_symbols = set(['__start_em_asm', '__stop_em_asm', + '__start_em_js', '__stop_em_js', + '__start_em_lib_deps', '__stop_em_lib_deps', + '__em_lib_deps']) + internal_prefixes = ('__em_js__', '__em_lib_deps') + return name in internal_start_stop_symbols or any(name.startswith(p) for p in internal_prefixes) + + # run binaryen's wasm-metadce to dce both js and wasm def metadce(js_file, wasm_file, debug_info): logger.debug('running meta-DCE') @@ -786,14 +795,17 @@ def metadce(js_file, wasm_file, debug_info): if 'import' in item and item['import'][1] in WASI_IMPORTS: item['import'][0] = settings.WASI_MODULE_NAME - # map import names from wasm to JS, using the actual name the wasm uses for the import + # map import/export names to native wasm symbols. import_name_map = {} + export_name_map = {} for item in graph: if 'import' in item: name = item['import'][1] - import_name_map[item['name']] = 'emcc$import$' + name + import_name_map[item['name']] = name if asmjs_mangle(name) in settings.SIDE_MODULE_IMPORTS: item['root'] = True + elif 'export' in item: + export_name_map[item['name']] = item['export'] temp = temp_files.get('.json', prefix='emcc_dce_graph_').name utils.write_file(temp, json.dumps(graph, indent=2)) # run wasm-metadce @@ -804,7 +816,8 @@ def metadce(js_file, wasm_file, debug_info): debug=debug_info, stdout=PIPE) # find the unused things in js - unused = [] + unused_imports = [] + unused_exports = [] PREFIX = 'unused: ' for line in out.splitlines(): if line.startswith(PREFIX): @@ -814,21 +827,25 @@ def metadce(js_file, wasm_file, debug_info): if settings.MAIN_MODULE and name.split('$')[-1] in ('wasmMemory', 'wasmTable'): continue # we only remove imports and exports in applyDCEGraphRemovals - if name in import_name_map: - name = import_name_map[name] - unused.append(name) + if name.startswith('emcc$import$'): + native_name = import_name_map[name] + unused_imports.append(native_name) elif name.startswith('emcc$export$'): - unused.append(name) - if not unused: + if settings.DECLARE_ASM_MODULE_EXPORTS: + native_name = export_name_map[name] + if not is_internal_global(native_name): + unused_exports.append(native_name) + if not unused_exports and not unused_imports: # nothing found to be unused, so we have nothing to remove return js_file # remove them passes = ['applyDCEGraphRemovals'] if settings.MINIFY_WHITESPACE: passes.append('--minify-whitespace') - extra_info = {'unused': unused} if DEBUG: - logger.debug("unused: %s", str(unused)) + logger.debug("unused_imports: %s", str(unused_imports)) + logger.debug("unused_exports: %s", str(unused_exports)) + extra_info = {'unusedImports': unused_imports, 'unusedExports': unused_exports} return acorn_optimizer(js_file, passes, extra_info=json.dumps(extra_info)) diff --git a/tools/emscripten.py b/tools/emscripten.py index 75b5a70ae9cbd..6734cf2fff107 100644 --- a/tools/emscripten.py +++ b/tools/emscripten.py @@ -295,6 +295,9 @@ def trim_asm_const_body(body): def create_global_exports(global_exports): lines = [] for k, v in global_exports.items(): + if building.is_internal_global(k): + continue + v = int(v) if settings.RELOCATABLE: v += settings.GLOBAL_BASE diff --git a/tools/extract_metadata.py b/tools/extract_metadata.py index 9ae422839d2da..c13263574a0a7 100644 --- a/tools/extract_metadata.py +++ b/tools/extract_metadata.py @@ -241,15 +241,8 @@ def get_main_reads_params(module, export_map): def get_global_exports(module, exports): global_exports = {} - internal_start_stop_symbols = set(['__start_em_asm', '__stop_em_asm', - '__start_em_js', '__stop_em_js', - '__start_em_lib_deps', '__stop_em_lib_deps', - '__em_lib_deps']) - internal_prefixes = ('__em_js__', '__em_lib_deps') for export in exports: if export.kind == webassembly.ExternType.GLOBAL: - if export.name in internal_start_stop_symbols or any(export.name.startswith(p) for p in internal_prefixes): - continue g = module.get_global(export.index) global_exports[export.name] = str(get_global_value(g)) return global_exports