Skip to content

Meta-DCE for JS+WASM #5919

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Dec 15, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
dd0cf7c
refactor js minification into a method
kripken Dec 6, 2017
e84a347
wip
kripken Dec 7, 2017
a62ab80
wip
kripken Dec 7, 2017
ae015fd
wip
kripken Dec 7, 2017
cf13abd
test
kripken Dec 7, 2017
01ab366
progress
kripken Dec 7, 2017
166d8a4
fix
kripken Dec 7, 2017
ac1fda7
start to apply metadce to js
kripken Dec 7, 2017
456ddf8
progress
kripken Dec 7, 2017
283e789
does work, but output is broken
kripken Dec 7, 2017
07229e0
Merge remote-tracking branch 'origin/incoming' into metadce
kripken Dec 7, 2017
2504349
nicer logging and export handling fixes [skip ci]y
kripken Dec 7, 2017
58cf6c1
use debug info in metadce
kripken Dec 8, 2017
65d2092
response file handling
kripken Dec 8, 2017
8f8e8a8
emterpreter closure fix
kripken Dec 8, 2017
0056115
maintain an explicit list of the exports the user requested
kripken Dec 8, 2017
59600df
do more js dce after meta-dce
kripken Dec 8, 2017
1980aaf
fix wasm backend import handling
kripken Dec 8, 2017
250b417
add a test for metadce
kripken Dec 8, 2017
4f07c7a
update binaryen port to version_40 (which includes wasm-metadce)
kripken Dec 8, 2017
0e53df3
cleanup [skip ci] [ci skip]
kripken Dec 8, 2017
d4059bd
don't metadce when there is a symbol map, we don't have support for t…
kripken Dec 8, 2017
69fd7aa
everything worked til now, but not well enough. found a place we let …
kripken Dec 8, 2017
0c899ce
improve test
kripken Dec 8, 2017
3ca978b
make other.test_binaryen_metadce more robust, don't depend on concret…
kripken Dec 8, 2017
b7fe2ec
run metadce in -O3
kripken Dec 9, 2017
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 21 additions & 17 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,10 @@ def save_intermediate(name=None, suffix='js'):
return
shutil.copyfile(final, name)
Intermediate.counter += 1

def save_intermediate_with_wasm(name, wasm_binary):
save_intermediate(name) # save the js
name = os.path.join(shared.get_emscripten_temp_dir(), 'emcc-%d-%s.wasm' % (Intermediate.counter - 1, name))
shutil.copyfile(wasm_binary, name)

class TimeLogger(object):
last = time.time()
Expand Down Expand Up @@ -732,7 +735,7 @@ def setting_sub(s):

if not arg.startswith('-'):
if not os.path.exists(arg):
exit_with_error('%s: No such file or directory ("%s" was expected to be an input file, based on the commandline arguments provided)', arg, arg)
exit_with_error('%s: No such file or directory ("%s" was expected to be an input file, based on the commandline arguments provided)' % (arg, arg))

arg_ending = filename_type_ending(arg)
if arg_ending.endswith(SOURCE_ENDINGS + BITCODE_ENDINGS + DYNAMICLIB_ENDINGS + ASSEMBLY_ENDINGS + HEADER_ENDINGS) or shared.Building.is_ar(arg): # we already removed -o <target>, so all these should be inputs
Expand Down Expand Up @@ -901,6 +904,9 @@ def check(input_file):
# used for warnings in emscripten.py
shared.Settings.ORIGINAL_EXPORTED_FUNCTIONS = original_exported_response or shared.Settings.EXPORTED_FUNCTIONS[:]

# Note the exports the user requested
shared.Building.user_requested_exports = shared.Settings.EXPORTED_FUNCTIONS[:]

# -s ASSERTIONS=1 implies the heaviest stack overflow check mode. Set the implication here explicitly to avoid having to
# do preprocessor "#if defined(ASSERTIONS) || defined(STACK_OVERFLOW_CHECK)" in .js files, which is not supported.
if shared.Settings.ASSERTIONS:
Expand Down Expand Up @@ -2282,6 +2288,7 @@ def do_binaryen(target, asm_target, options, memfile, wasm_binary_target,
# normally we emit binary, but for debug info, we might emit text first
wrote_wasm_text = False
debug_info = options.debug_level >= 2 or options.profiling_funcs
emit_symbol_map = options.emit_symbol_map or shared.Settings.CYBERDWARF
# finish compiling to WebAssembly, using asm2wasm, if we didn't already emit WebAssembly directly using the wasm backend.
if not shared.Settings.WASM_BACKEND:
if DEBUG:
Expand Down Expand Up @@ -2320,7 +2327,7 @@ def do_binaryen(target, asm_target, options, memfile, wasm_binary_target,
cmd += ['--enable-threads']
if debug_info:
cmd += ['-g']
if options.emit_symbol_map or shared.Settings.CYBERDWARF:
if emit_symbol_map:
cmd += ['--symbolmap=' + target + '.symbols']
# we prefer to emit a binary, as it is more efficient. however, when we
# want full debug info support (not just function names), then we must
Expand Down Expand Up @@ -2395,20 +2402,17 @@ def do_binaryen(target, asm_target, options, memfile, wasm_binary_target,
# minify the JS
optimizer.do_minify() # calculate how to minify
if optimizer.cleanup_shell or options.use_closure_compiler:
if DEBUG: save_intermediate('preclean', 'js')
# in -Os and -Oz, run AJSDCE (aggressive JS DCE, performs multiple iterations)
passes = ['noPrintMetadata', 'JSDCE' if options.shrink_level == 0 else 'AJSDCE', 'last']
if optimizer.minify_whitespace:
passes.append('minifyWhitespace')
misc_temp_files.note(final)
logging.debug('running cleanup on shell code: ' + ' '.join(passes))
final = shared.Building.js_optimizer_no_asmjs(final, passes)
if DEBUG: save_intermediate('postclean', 'js')
if options.use_closure_compiler:
logging.debug('running closure on shell code')
misc_temp_files.note(final)
final = shared.Building.closure_compiler(final, pretty=not optimizer.minify_whitespace)
if DEBUG: save_intermediate('postclosure', 'js')
if DEBUG:
save_intermediate_with_wasm('preclean', wasm_binary_target)
final = shared.Building.minify_wasm_js(js_file=final,
wasm_file=wasm_binary_target,
expensive_optimizations=options.opt_level >= 3 or options.shrink_level > 0,
minify_whitespace=optimizer.minify_whitespace,
use_closure_compiler=options.use_closure_compiler,
debug_info=debug_info,
emit_symbol_map=emit_symbol_map)
if DEBUG:
save_intermediate_with_wasm('postclean', wasm_binary_target)
# replace placeholder strings with correct subresource locations
if shared.Settings.SINGLE_FILE:
f = open(final, 'r')
Expand Down
25 changes: 17 additions & 8 deletions emscripten.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def parse_backend_output(backend_output, DEBUG):
logging.error('emscript: failure to parse metadata output from compiler backend. raw output is: \n' + metadata_raw)
raise e

#if DEBUG: print >> sys.stderr, "FUNCS", funcs
#if DEBUG: print >> sys.stderr, "META", metadata
#if DEBUG: print >> sys.stderr, "meminit", mem_init
# functions marked llvm.used in the code are exports requested by the user
shared.Building.user_requested_exports += metadata['exports']

return funcs, metadata, mem_init


Expand Down Expand Up @@ -597,11 +597,18 @@ def get_all_implemented(forwarded_json, metadata):
return metadata['implementedFunctions'] + list(forwarded_json['Functions']['implementedFunctions'].keys()) # XXX perf?


# Return the list of original exports, for error reporting. It may
# be a response file, in which case, load it
def get_original_exported_functions(settings):
ret = settings['ORIGINAL_EXPORTED_FUNCTIONS']
if ret[0] == '@':
ret = json.loads(open(ret[1:]).read())
return ret


def check_all_implemented(all_implemented, pre, settings):
if settings['ASSERTIONS'] and settings.get('ORIGINAL_EXPORTED_FUNCTIONS'):
original_exports = settings['ORIGINAL_EXPORTED_FUNCTIONS']
if original_exports[0] == '@':
original_exports = json.loads(open(original_exports[1:]).read())
original_exports = get_original_exported_functions(settings)
for requested in original_exports:
if not is_already_implemented(requested, pre, all_implemented):
# could be a js library func
Expand Down Expand Up @@ -1862,8 +1869,7 @@ def create_exported_implemented_functions_wasm(pre, forwarded_json, metadata, se
exported_implemented_functions.add(key)

if settings['ASSERTIONS'] and settings.get('ORIGINAL_EXPORTED_FUNCTIONS'):
original_exports = settings['ORIGINAL_EXPORTED_FUNCTIONS']
if original_exports[0] == '@': original_exports = json.loads(open(original_exports[1:]).read())
original_exports = get_original_exported_functions(settings)
for requested in original_exports:
# check if already implemented
# special-case malloc, EXPORTED by default for internal use, but we bake in a trivial allocator and warn at runtime if used in ASSERTIONS \
Expand Down Expand Up @@ -2066,6 +2072,9 @@ def load_metadata(metadata_raw):
# Initializers call the global var version of the export, so they get the mangled name.
metadata['initializers'] = list(map(asmjs_mangle, metadata['initializers']))

# functions marked llvm.used in the code are exports requested by the user
shared.Building.user_requested_exports += metadata['exports']

return metadata


Expand Down
2 changes: 1 addition & 1 deletion src/postamble.js
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Module['callMain'] = function callMain(args) {
argv = allocate(argv, 'i32', ALLOC_NORMAL);

#if EMTERPRETIFY_ASYNC
var initialEmtStackTop = Module['asm'].emtStackSave();
var initialEmtStackTop = Module['asm']['emtStackSave']();
#endif

try {
Expand Down
8 changes: 4 additions & 4 deletions tests/optimizer/JSDCE-uglifyjsNodeTypes-output.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
var defun = (function() {
});
})();
var name = (function() {
});
})();
var object = (function() {
});
})();
var non_reserved = (function() {
});
})();
function func_1() {
}
function func_2() {
Expand Down
8 changes: 4 additions & 4 deletions tests/optimizer/JSDCE-uglifyjsNodeTypes.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
var defun = function () { var a = 1; };
var name = function () { var a = 1; };
var object = function () { var a = 1; };
var non_reserved = function () { var a = 1; };
var defun = (function () { var a = 1; })();
var name = (function () { var a = 1; })();
var object = (function () { var a = 1; })();
var non_reserved = (function () { var a = 1; })();

function func_1() { var a = 1; }
function func_2() { var a = 1; }
Expand Down
26 changes: 26 additions & 0 deletions tests/optimizer/applyDCEGraphRemovals-output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
var name;
Module.asmLibraryArg = {
"save1": 1,
"save2": 2
};
var expD1 = Module["expD1"] = asm["expD1"];
var expD2 = Module["expD2"] = asm["expD2"];
var expD3 = Module["expD3"] = asm["expD3"];
var expD4 = undefined;
var expI1 = Module["expI1"] = (function() {
return Module["asm"]["expI1"].apply(null, arguments);
});
var expI2 = Module["expI2"] = (function() {
return Module["asm"]["expI2"].apply(null, arguments);
});
var expI3 = Module["expI3"] = (function() {
return Module["asm"]["expI3"].apply(null, arguments);
});
var expI4 = undefined;
expD1;
Module["expD2"];
asm["expD3"];
expI1;
Module["expI2"];
asm["expI3"];

33 changes: 33 additions & 0 deletions tests/optimizer/applyDCEGraphRemovals.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
var name;
Module.asmLibraryArg = { 'save1': 1, 'number': 33, 'name': name, 'func': function() {}, 'save2': 2 };

// exports gotten directly
var expD1 = Module['expD1'] = asm['expD1'];
var expD2 = Module['expD2'] = asm['expD2'];
var expD3 = Module['expD3'] = asm['expD3'];
var expD4 = Module['expD4'] = asm['expD4'];

// exports gotten indirectly (async compilation
var expI1 = Module['expI1'] = (function() {
return Module['asm']['expI1'].apply(null, arguments);
});
var expI2 = Module['expI2'] = (function() {
return Module['asm']['expI2'].apply(null, arguments);
});
var expI3 = Module['expI3'] = (function() {
return Module['asm']['expI3'].apply(null, arguments);
});
var expI4 = Module['expI4'] = (function() {
return Module['asm']['expI4'].apply(null, arguments);
});

// add uses for some of them, leave *4 as non-roots
expD1;
Module['expD2'];
asm['expD3'];

expI1;
Module['expI2'];
asm['expI3'];

// EXTRA_INFO: { "unused": ["emcc$import$number", "emcc$import$name", "emcc$import$func", "emcc$export$expD4", "emcc$export$expI4"] }
60 changes: 60 additions & 0 deletions tests/optimizer/emitDCEGraph-output.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
[
{
"name": "emcc$import$number",
"import": [
"env",
"number"
]
},
{
"name": "emcc$import$temp",
"import": [
"env",
"temp"
]
},
{
"name": "emcc$export$expD1",
"export": "expD1",
"root": true
},
{
"name": "emcc$export$expD2",
"export": "expD2",
"root": true
},
{
"name": "emcc$export$expD3",
"export": "expD3",
"root": true
},
{
"name": "emcc$export$expD4",
"export": "expD4"
},
{
"name": "emcc$export$expI1",
"export": "expI1",
"root": true
},
{
"name": "emcc$export$expI2",
"export": "expI2",
"root": true
},
{
"name": "emcc$export$expI3",
"export": "expI3",
"root": true
},
{
"name": "emcc$export$expI4",
"export": "expI4"
},
{
"name": "emcc$export$expD1NM",
"export": "expD1NM",
"root": true
}
]

36 changes: 36 additions & 0 deletions tests/optimizer/emitDCEGraph.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
var temp;
Module.asmLibraryArg = { 'number': 33, 'temp': temp };

// exports gotten directly
var expD1 = Module['expD1'] = asm['expD1'];
var expD2 = Module['expD2'] = asm['expD2'];
var expD3 = Module['expD3'] = asm['expD3'];
var expD4 = Module['expD4'] = asm['expD4'];

// exports gotten indirectly (async compilation
var expI1 = Module['expI1'] = (function() {
return Module['asm']['expI1'].apply(null, arguments);
});
var expI2 = Module['expI2'] = (function() {
return Module['asm']['expI2'].apply(null, arguments);
});
var expI3 = Module['expI3'] = (function() {
return Module['asm']['expI3'].apply(null, arguments);
});
var expI4 = Module['expI4'] = (function() {
return Module['asm']['expI4'].apply(null, arguments);
});

// add uses for some of them, leave *4 as non-roots
expD1;
Module['expD2'];
asm['expD3'];

expI1;
Module['expI2'];
asm['expI3'];

// without a Module use, not ok to remove, as this looks weird
// and we don't know what's going on
var expD1NM = asm['expD1NM'];

33 changes: 33 additions & 0 deletions tests/test_other.py
Original file line number Diff line number Diff line change
Expand Up @@ -1976,6 +1976,10 @@ def test_js_optimizer(self):
['JSDCE']),
(path_from_root('tests', 'optimizer', 'AJSDCE.js'), open(path_from_root('tests', 'optimizer', 'AJSDCE-output.js')).read(),
['AJSDCE']),
(path_from_root('tests', 'optimizer', 'emitDCEGraph.js'), open(path_from_root('tests', 'optimizer', 'emitDCEGraph-output.js')).read(),
['emitDCEGraph', 'noEmitAst']),
(path_from_root('tests', 'optimizer', 'applyDCEGraphRemovals.js'), open(path_from_root('tests', 'optimizer', 'applyDCEGraphRemovals-output.js')).read(),
['applyDCEGraphRemovals']),
]:
print(input, passes)

Expand Down Expand Up @@ -7663,6 +7667,35 @@ def break_cashew():
assert proc.returncode != 0, err
assert 'hello, world!' not in out, out

def test_binaryen_metadce(self):
sizes = {}
# in -Os, -Oz, we remove imports wasm doesn't need
for args, expected_len, expected_exists, expected_not_exists in [
([], 24, ['abort', 'tempDoublePtr'], ['waka']),
(['-O1'], 21, ['abort', 'tempDoublePtr'], ['waka']),
(['-O2'], 21, ['abort', 'tempDoublePtr'], ['waka']),
(['-O3'], 16, ['abort'], ['tempDoublePtr', 'waka']), # in -O3, -Os and -Oz we metadce
(['-Os'], 16, ['abort'], ['tempDoublePtr', 'waka']),
(['-Oz'], 16, ['abort'], ['tempDoublePtr', 'waka']),
# finally, check what happens when we export pretty much nothing. wasm should be almost empty
(['-Os', '-s', 'EXPORTED_FUNCTIONS=[]', '-s', 'EXPORTED_RUNTIME_METHODS=[]'], 9, ['abort'], ['tempDoublePtr', 'waka']),
]:
print(args, expected_len, expected_exists, expected_not_exists)
subprocess.check_call([PYTHON, EMCC, path_from_root('tests', 'hello_world.cpp')] + args + ['-s', 'WASM=1', '-g2'])
# find the imports we send from JS
js = open('a.out.js').read()
start = js.find('Module.asmLibraryArg = ')
end = js.find('}', start) + 1
start = js.find('{', start)
relevant = js[start+2:end-2]
relevant = relevant.replace(' ', '').replace('"', '').replace("'", '').split(',')
sent = [x.split(':')[0].strip() for x in relevant]
assert len(sent) == expected_len, (len(sent), expected_len)
for exists in expected_exists:
assert exists in sent, [exists, sent]
for not_exists in expected_not_exists:
assert not_exists not in sent, [not_exists, sent]

# test disabling of JS FFI legalization
def test_legalize_js_ffi(self):
with clean_write_access_to_canonical_temp_dir():
Expand Down
2 changes: 1 addition & 1 deletion tools/emterpretify.py
Original file line number Diff line number Diff line change
Expand Up @@ -787,7 +787,7 @@ def process(code):

# process functions, generating bytecode
with temp_files.get_file('.js') as temp:
shared.Building.js_optimizer(infile, ['emterpretify'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS }, output_filename=temp, just_concat=True)
shared.Building.js_optimizer(infile, ['emterpretify', 'noEmitAst'], extra_info={ 'emterpretedFuncs': list(emterpreted_funcs), 'externalEmterpretedFuncs': list(external_emterpreted_funcs), 'opcodes': OPCODES, 'ropcodes': ROPCODES, 'ASYNC': ASYNC, 'PROFILING': PROFILING, 'ASSERTIONS': ASSERTIONS }, output_filename=temp, just_concat=True)
# load the module and modify it
asm = asm_module.AsmModule(temp)

Expand Down
Loading