diff --git a/src/passes/CMakeLists.txt b/src/passes/CMakeLists.txt index 4cb696c2199..90ee376f218 100644 --- a/src/passes/CMakeLists.txt +++ b/src/passes/CMakeLists.txt @@ -48,6 +48,7 @@ set(passes_SOURCES Heap2Local.cpp I64ToI32Lowering.cpp Inlining.cpp + InstrumentCooperativeGC.cpp InstrumentLocals.cpp InstrumentMemory.cpp Intrinsics.cpp diff --git a/src/passes/I64ToI32Lowering.cpp b/src/passes/I64ToI32Lowering.cpp index 0f591871b3c..82c2fc5eb0c 100644 --- a/src/passes/I64ToI32Lowering.cpp +++ b/src/passes/I64ToI32Lowering.cpp @@ -379,7 +379,8 @@ struct I64ToI32Lowering : public WalkerPass> { if (curr->type != Type::i64) { return; } - assert(!curr->isAtomic && "64-bit atomic load not implemented"); + //assert(!curr->isAtomic && "64-bit atomic load not implemented"); +// if (curr->isAtomic) printf("64-bit atomic load not implemented\n"); TempVar lowBits = getTemp(); TempVar highBits = getTemp(); TempVar ptrTemp = getTemp(); @@ -423,7 +424,8 @@ struct I64ToI32Lowering : public WalkerPass> { return; } assert(curr->offset + 4 > curr->offset); - assert(!curr->isAtomic && "atomic store not implemented"); +// assert(!curr->isAtomic && "atomic store not implemented"); +// if (curr->isAtomic) printf("64-bit atomic store not implemented\n"); TempVar highBits = fetchOutParam(curr->value); uint8_t bytes = curr->bytes; curr->bytes = std::min(curr->bytes, uint8_t(4)); diff --git a/src/passes/InstrumentCooperativeGC.cpp b/src/passes/InstrumentCooperativeGC.cpp new file mode 100644 index 00000000000..2b0ed66ea68 --- /dev/null +++ b/src/passes/InstrumentCooperativeGC.cpp @@ -0,0 +1,162 @@ +/* + * Copyright 2017 WebAssembly Community Group participants + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// +// Instruments the build with code to log execution at each function +// entry, loop header, and return. This can be useful in debugging, to log out +// a trace, and diff it to another (running in another browser, to +// check for bugs, for example). +// +// The logging is performed by calling an ffi with an id for each +// call site. You need to provide that import on the JS side. +// +// This pass is more effective on flat IR (--flatten) since when it +// instruments say a return, there will be no code run in the return's +// value. +// + +#include "asmjs/shared-constants.h" +#include "shared-constants.h" +#include +#include +#include + +namespace wasm { + +Name GC_FUNC("gc_participate_to_garbage_collection"); + +struct InstrumentCooperativeGC : public WalkerPass> { + // Adds calls to internal function. + bool addsEffects() override { return true; } + + static bool isBlacklistedFunctionName(const Name &n) + { + const char *blacklisted[] = { + "wait_for_all_participants", + "start_multithreaded_collection", + "start_multithreaded_marking", + "wait_for_all_threads_finished_marking", + "hash_ptr", + "find_insert_index", + "find_index", + "table_insert", + "realloc_table", + "free_at_index", + "mark", + "sweep", + "collect_when_stack_is_empty", + "hash_finalizer", + "find_finalizer_index", + "find_and_run_a_finalizer", + "insert_finalizer", + "wait_for_all_participants", + "start_multithreaded_collection", + "start_multithreaded_marking", + "wait_for_all_threads_finished_marking", + "mark_current_thread_stack", + "mark_from_queue", + "finish_multithreaded_marking", + "hash_root", + "insert_root", + "attempt_allocate", + "realloc_table", + "claim_more_memory", + "main", + "exit_fenced_access", + "sbrk", + "dlcalloc", + "__wasm_init_memory", + "__wasm_call_ctors", + "strlen", + "strcpy", + "stackSave", + "stackRestore", + "stackAlloc", + "emscripten_wasm_worker_initialize", + "dlfree", + "internal_memalign", + "prepend_alloc", + "dispose_chunk", + "fflush", + "BITVEC_CAS_SET", + }; + for(int i = 0; i < sizeof(blacklisted)/sizeof(blacklisted[0]); ++i) + if (n == blacklisted[i]) return true; + return false; + } + + static bool functionIsBlacklisted(Function *curr) { + return (curr->imported() || (curr->name.startsWith("gc_") && curr->name != "gc_sleep") || curr->name.startsWith("__") || + curr->name.startsWith("emmalloc") || curr->name.startsWith("dlmalloc") || + curr->name.startsWith("emscripten_stack") || + curr->name.startsWith("SAFE_HEAP") || + isBlacklistedFunctionName(curr->name)); + } + + int numCheckpointsAddedInFunction = 0, numCheckpointsAddedTotal = 0, numFunctionsAnnotated = 0, numFunctionsNothingToAdd = 0, numFunctionsSkipped = 0; + + void walkFunction(Function* func) { + if (!functionIsBlacklisted(func)) + { + ++numFunctionsAnnotated; + setFunction(func); + visitFunction(func); + doWalkFunction(func); + setFunction(nullptr); + if (numCheckpointsAddedInFunction) + printf("InstrumentCooperativeGC: injected %d GC check points to function \"%s\".\n", numCheckpointsAddedInFunction, func->name.str.data()); + else + { + //printf("InstrumentCooperativeGC: \"%s\": no GC points to add.\n", func->name.str.data()); + ++numFunctionsNothingToAdd; + } + numCheckpointsAddedTotal += numCheckpointsAddedInFunction; + numCheckpointsAddedInFunction = 0; + } + else + ++numFunctionsSkipped; + } + + void visitModule(Module* curr) { + printf("InstrumentCooperativeGC summary: Injected a total of %d GC checkpoints to %d/%d (%.2f%%) functions. (%d had nothing to add, %d were blacklisted). Avg GC checkpoints: %.3f per added function, %.3f per all functions in program.\n", + numCheckpointsAddedTotal, numFunctionsAnnotated-numFunctionsNothingToAdd, numFunctionsAnnotated+numFunctionsSkipped, + (numFunctionsAnnotated-numFunctionsNothingToAdd)*100.0/(numFunctionsAnnotated+numFunctionsSkipped), + numFunctionsNothingToAdd, numFunctionsSkipped, + (double)numCheckpointsAddedTotal / (numFunctionsAnnotated-numFunctionsNothingToAdd), + (double)numCheckpointsAddedTotal / (numFunctionsAnnotated+numFunctionsSkipped)); + } + + void visitLoop(Loop* curr) { + ++numCheckpointsAddedInFunction; + + Builder builder(*getModule()); + curr->body = builder.makeSequence( + builder.makeCall(GC_FUNC, {}, Type::none), + curr->body); + } + +private: + Expression* makeIntrumentGCCall(Expression* curr) { + Builder builder(*getModule()); + return builder.makeSequence( + builder.makeCall(GC_FUNC, {}, Type::none), + curr); + } +}; + +Pass* createInstrumentCooperativeGCPass() { return new InstrumentCooperativeGC(); } + +} // namespace wasm diff --git a/src/passes/SpillPointers.cpp b/src/passes/SpillPointers.cpp index 3af7039cdd3..d1e6f96888d 100644 --- a/src/passes/SpillPointers.cpp +++ b/src/passes/SpillPointers.cpp @@ -27,6 +27,7 @@ #include "abi/stack.h" #include "cfg/liveness-traversal.h" +#include "ir/import-utils.h" #include "pass.h" #include "wasm-builder.h" #include "wasm.h" @@ -66,11 +67,132 @@ struct SpillPointers void visitCall(Call* curr) { visitSpillable(curr); } void visitCallIndirect(CallIndirect* curr) { visitSpillable(curr); } + static bool isBlacklistedFunctionName(const Name &n) + { + const char *blacklisted[] = { + // These functions need to be blacklisted for correctness: we cannot + // let the spill pointers pass create a temp stack frame to these functions, + // or the stackRestore/stackAlloc functions would become no-ops. + "stackAlloc", + "stackRestore", + + // This needs to be blacklisted, or core0.test_pthread_dylink_basics fails. Not immediately sure why that is. + "_emscripten_tls_init", + + // The following functions are blacklisted for performance optimization reasons: (and testing/debugging reasons.. not intended to be final code in any form) + // These won't ever deal with managed GC pointers, so we can get slightly + // improved codegen performance by skipping these. + "stackSave", + "_emscripten_thread_init", + "dlmalloc", + "dlrealloc", + "dlfree", + "tmalloc_small", + "prepend_alloc", + "sbrk", + "__emscripten_init_main_thread", + "__memcpy", + "__funcs_on_exit", + "start_multithreaded_collection", + "gc_collect", + "_emscripten_proxy_main", + "emscripten_proxy_async", + "init_pthread_self", + "gc_participate_to_garbage_collection", + "find_and_run_a_finalizer", + "sweep", + "gc_enter_fenced_access", + "gc_malloc", + "realloc_zeroed", + "gc_uninterrupted_sleep", + "gc_sleep", + "mark_from_queue", + "wait_for_all_threads_finished_marking", + "mark", + "gc_sleep", + "gc_enter_fence_cb", + "internal_memalign", + "realloc_table", + "dlcalloc", + "find_insert_index", + "find_index", + "free_at_index", + "find_finalizer_index", + "mark_current_thread_stack", + "gc_dump", + "table_insert", + "gc_is_ptr", + + "getpid", + "out", + "frexp", + "wctomb", + "fwrite", + "fputs", + "vfprintf", + "pop_arg_long_double", + "strnlen", + "wcrtomb", + "printf", + "puts", + "pop_arg", + "pad", + "fflush", + "printf_core", + "fmt_fp", + }; + for(int i = 0; i < sizeof(blacklisted)/sizeof(blacklisted[0]); ++i) + if (n == blacklisted[i]) return true; + return false; + } + // main entry point + int numPointersSpilledTotal = 0; + int numFunctionsPointersSpilled = 0, numFunctionsPointersNotSpilled = 0, numFunctionsBlacklisted = 0; + int numPointersSpilled; + int extraStackSpace; + void doWalkFunction(Function* func) { super::doWalkFunction(func); + if (isBlacklistedFunctionName(func->name) + // The passes/SafeHeap.cpp pass replaces assignments with function calls, but the functions that call into SAFE_HEAP might not generate a stack frame (bump stack_pointer) + // for themselves if they don't call any other functions. This would cause pointer spilling inside SAFE_HEAP_ to stomp on the stack of the caller. So we must skip pointer + // spilling inside these dynamically code generated SAFE_HEAP functions (i.e. this is needed for correctness, but of course also good for performance in SAFE_HEAP mode) + || func->name.startsWith("SAFE_HEAP") + + // The following are blacklisted for performance optimizations only: + || func->name.startsWith("em_task_") + || func->name.startsWith("_emscripten_thread_") + || func->name.startsWith("_pthread") + || func->name.startsWith("__pthread") + || func->name.startsWith("__wasm") + || func->name.startsWith("__wasi") + || func->name.startsWith("emscripten") + || func->name.startsWith("_emscripten") + || func->name.startsWith("pthread") + || func->name.startsWith("emmalloc") + || func->name.startsWith("legalstub$") + || func->name.startsWith("dynCall_") + || func->name.startsWith("__") + +) + { + ++numFunctionsBlacklisted; + return; + } + numPointersSpilledTotal += numPointersSpilled; + numPointersSpilled = 0; + extraStackSpace = 0; spillPointers(); + if (numPointersSpilled || extraStackSpace) + { + ++numFunctionsPointersSpilled; + printf("SpillPointers: added %d pointer spill stores in function %s. Stack frame grew by %d bytes.\n", + numPointersSpilled, func->name.str.data(), extraStackSpace); + } + else + ++numFunctionsPointersNotSpilled; } // map pointers to their offset in the spill area @@ -78,7 +200,25 @@ struct SpillPointers Type pointerType; + /* + void visitModule(Module* curr) { + printf("SpillPointers summary: Added a total of %d spill stores to %d/%d (%.2f%%) functions. (%d had nothing to add, %d were blacklisted). Avg spill stores: %.3f per added function, %.3f per all functions in program.\n", + numPointersSpilledTotal, numFunctionsPointersSpilled, numFunctionsPointersSpilled + numFunctionsBlacklisted + numFunctionsPointersNotSpilled, + numFunctionsPointersSpilled * 100.0 / (numFunctionsPointersSpilled + numFunctionsBlacklisted + numFunctionsPointersNotSpilled), + numFunctionsPointersNotSpilled, numFunctionsBlacklisted, + (double)numPointersSpilledTotal / numFunctionsPointersSpilled, + (double)numPointersSpilledTotal / (numFunctionsPointersSpilled + numFunctionsBlacklisted + numFunctionsPointersNotSpilled)); + } + */ + void spillPointers() { + std::string HANDLE_STACK_OVERFLOW = + getPassOptions().getArgumentOrDefault("stack-check-handler", ""); + + ImportInfo info(*getModule()); + Function* stack_overflow_check = + info.getImportedFunction(ENV, HANDLE_STACK_OVERFLOW); + pointerType = getModule()->memories[0]->indexType; // we only care about possible pointers @@ -119,6 +259,27 @@ struct SpillPointers } else if (action.isSet()) { live.erase(action.index); } else if (action.isOther()) { + auto* pointer = actualPointers[action.origin]; + auto* call = *pointer; + if (call->type == Type::unreachable) { + continue; // the call is never reached anyhow, ignore + } + + // Do not spill pointers right before a call to function + // __handle_stack_overflow(). This does improve performance, but is + // primarily needed for correctness, to avoid creating a new stack + // frame inside function stackAlloc(), which should bump the stack + // pointer of the caller's stack frame, hence it cannot generate a + // stack frame inside the function itself. By default stackAlloc() + // does not call any other functions except __handle_stack_overflow(). + // (same for stackRestore() and stackSave()). + Call* c = static_cast(call); + if (c->target == HANDLE_STACK_OVERFLOW || + (stack_overflow_check && + c->target == stack_overflow_check->name)) { + continue; + } + std::vector toSpill; for (auto index : live) { if (pointerMap.count(index) > 0) { @@ -135,7 +296,6 @@ struct SpillPointers spilled = true; } // the origin was seen at walk, but the thing may have moved - auto* pointer = actualPointers[action.origin]; spillPointersAroundCall( pointer, toSpill, spillLocal, pointerMap, func, getModule()); } @@ -146,6 +306,13 @@ struct SpillPointers } if (spilled) { // get the stack space, and set the local to it + auto* stackPointer = getStackPointerGlobal(*getModule()); + if (!stackPointer) { + printf("SpillPointers: unable to find stack pointer for function %s\n", func->name.str.data()); + return; + } + + extraStackSpace += pointerType.getByteSize() * pointerMap.size(); ABI::getStackSpace(spillLocal, func, pointerType.getByteSize() * pointerMap.size(), @@ -160,9 +327,6 @@ struct SpillPointers Function* func, Module* module) { auto* call = *origin; - if (call->type == Type::unreachable) { - return; // the call is never reached anyhow, ignore - } Builder builder(*module); auto* block = builder.makeBlock(); // move the operands into locals, as we must spill after they are executed @@ -204,6 +368,8 @@ struct SpillPointers block->list.push_back(call); block->finalize(); *origin = block; + + numPointersSpilled += toSpill.size(); } }; diff --git a/src/passes/pass.cpp b/src/passes/pass.cpp index 1fe81cbfc7d..5b7cae5ab0b 100644 --- a/src/passes/pass.cpp +++ b/src/passes/pass.cpp @@ -233,6 +233,9 @@ void PassRegistry::registerPasses() { registerPass("i64-to-i32-lowering", "lower all uses of i64s to use i32s instead", createI64ToI32LoweringPass); + registerPass("instrument-cooperative-gc", + "instrument the build with cooperative synchronization points for multithreaded garbage collection", + createInstrumentCooperativeGCPass); registerPass( "instrument-locals", "instrument the build with code to intercept all loads and stores", diff --git a/src/passes/passes.h b/src/passes/passes.h index c3ab7773f1e..3bdc8244e10 100644 --- a/src/passes/passes.h +++ b/src/passes/passes.h @@ -74,6 +74,7 @@ Pass* createLocalCSEPass(); Pass* createLocalSubtypingPass(); Pass* createLogExecutionPass(); Pass* createIntrinsicLoweringPass(); +Pass* createInstrumentCooperativeGCPass(); Pass* createInstrumentLocalsPass(); Pass* createInstrumentMemoryPass(); Pass* createLoopInvariantCodeMotionPass();