diff -r 000000000000 -r 6474c204b198 js/src/vm/ForkJoin.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/js/src/vm/ForkJoin.h Wed Dec 31 06:09:35 2014 +0100 @@ -0,0 +1,559 @@ +/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set ts=8 sts=4 et sw=4 tw=99: + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#ifndef vm_ForkJoin_h +#define vm_ForkJoin_h + +#include "mozilla/ThreadLocal.h" + +#include "jscntxt.h" + +#include "gc/GCInternals.h" + +#include "jit/Ion.h" + +/////////////////////////////////////////////////////////////////////////// +// Read Me First +// +// The ForkJoin abstraction: +// ------------------------- +// +// This is the building block for executing multi-threaded JavaScript with +// shared memory (as distinct from Web Workers). The idea is that you have +// some (typically data-parallel) operation which you wish to execute in +// parallel across as many threads as you have available. +// +// The ForkJoin abstraction is intended to be used by self-hosted code +// to enable parallel execution. At the top-level, it consists of a native +// function (exposed as the ForkJoin intrinsic) that is used like so: +// +// ForkJoin(func, sliceStart, sliceEnd, mode) +// +// The intention of this statement is to start some some number (usually the +// number of hardware threads) of copies of |func()| running in parallel. Each +// copy will then do a portion of the total work, depending on +// workstealing-based load balancing. +// +// Typically, each of the N slices runs in a different worker thread, but that +// is not something you should rely upon---if work-stealing is enabled it +// could be that a single worker thread winds up handling multiple slices. +// +// The second and third arguments, |sliceStart| and |sliceEnd|, are the slice +// boundaries. These numbers must each fit inside an uint16_t. +// +// The fourth argument, |mode|, is an internal mode integer giving finer +// control over the behavior of ForkJoin. See the |ForkJoinMode| enum. +// +// func() should expect the following arguments: +// +// func(workerId, sliceStart, sliceEnd) +// +// The |workerId| parameter is the id of the worker executing the function. It +// is 0 in sequential mode. +// +// The |sliceStart| and |sliceEnd| parameters are the current bounds that that +// the worker is handling. In parallel execution, these parameters are not +// used. In sequential execution, they tell the worker what slices should be +// processed. During the warm up phase, sliceEnd == sliceStart + 1. +// +// |func| can keep asking for more work from the scheduler by calling the +// intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no +// more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel +// value. By exposing this function as an intrinsic, we reduce the number of +// JS-C++ boundary crossings incurred by workstealing, which may have many +// slices. +// +// In sequential execution, |func| should return the maximum computed slice id +// S for which all slices with id < S have already been processed. This is so +// ThreadPool can track the leftmost completed slice id to maintain +// determinism. Slices which have been completed in sequential execution +// cannot be re-run in parallel execution. +// +// In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING! +// Not doing so is an error and is protected by debug asserts in ThreadPool. +// +// Warmups and Sequential Fallbacks +// -------------------------------- +// +// ForkJoin can only execute code in parallel when it has been +// ion-compiled in Parallel Execution Mode. ForkJoin handles this part +// for you. However, because ion relies on having decent type +// information available, it is necessary to run the code sequentially +// for a few iterations first to get the various type sets "primed" +// with reasonable information. We try to make do with just a few +// runs, under the hypothesis that parallel execution code which reach +// type stability relatively quickly. +// +// The general strategy of ForkJoin is as follows: +// +// - If the code has not yet been run, invoke `func` sequentially with +// warmup set to true. When warmup is true, `func` should try and +// do less work than normal---just enough to prime type sets. (See +// ParallelArray.js for a discussion of specifically how we do this +// in the case of ParallelArray). +// +// - Try to execute the code in parallel. Parallel execution mode has +// three possible results: success, fatal error, or bailout. If a +// bailout occurs, it means that the code attempted some action +// which is not possible in parallel mode. This might be a +// modification to shared state, but it might also be that it +// attempted to take some theoreticaly pure action that has not been +// made threadsafe (yet?). +// +// - If parallel execution is successful, ForkJoin returns true. +// +// - If parallel execution results in a fatal error, ForkJoin returns false. +// +// - If parallel execution results in a *bailout*, this is when things +// get interesting. In that case, the semantics of parallel +// execution guarantee us that no visible side effects have occurred +// (unless they were performed with the intrinsic +// |UnsafePutElements()|, which can only be used in self-hosted +// code). We therefore reinvoke |func()| but with warmup set to +// true. The idea here is that often parallel bailouts result from +// a failed type guard or other similar assumption, so rerunning the +// warmup sequentially gives us a chance to recompile with more +// data. Because warmup is true, we do not expect this sequential +// call to process all remaining data, just a chunk. After this +// recovery execution is complete, we again attempt parallel +// execution. +// +// - If more than a fixed number of bailouts occur, we give up on +// parallelization and just invoke |func()| N times in a row (once +// for each worker) but with |warmup| set to false. +// +// Interrupts: +// +// During parallel execution, |cx.check()| must be periodically invoked to +// check for interrupts. This is automatically done by the Ion-generated +// code. If an interrupt has been requested |cx.check()| aborts parallel +// execution. +// +// Transitive compilation: +// +// One of the challenges for parallel compilation is that we +// (currently) have to abort when we encounter an uncompiled script. +// Therefore, we try to compile everything that might be needed +// beforehand. The exact strategy is described in `ParallelDo::apply()` +// in ForkJoin.cpp, but at the highest level the idea is: +// +// 1. We maintain a flag on every script telling us if that script and +// its transitive callees are believed to be compiled. If that flag +// is set, we can skip the initial compilation. +// 2. Otherwise, we maintain a worklist that begins with the main +// script. We compile it and then examine the generated parallel IonScript, +// which will have a list of callees. We enqueue those. Some of these +// compilations may take place off the main thread, in which case +// we will run warmup iterations while we wait for them to complete. +// 3. If the warmup iterations finish all the work, we're done. +// 4. If compilations fail, we fallback to sequential. +// 5. Otherwise, we will try running in parallel once we're all done. +// +// Bailout tracing and recording: +// +// When a bailout occurs, we record a bit of state so that we can +// recover with grace. Each |ForkJoinContext| has a pointer to a +// |ParallelBailoutRecord| pre-allocated for this purpose. This +// structure is used to record the cause of the bailout, the JSScript +// which was executing, as well as the location in the source where +// the bailout occurred (in principle, we can record a full stack +// trace, but right now we only record the top-most frame). Note that +// the error location might not be in the same JSScript as the one +// which was executing due to inlining. +// +// Garbage collection and allocation: +// +// Code which executes on these parallel threads must be very careful +// with respect to garbage collection and allocation. The typical +// allocation paths are UNSAFE in parallel code because they access +// shared state (the compartment's arena lists and so forth) without +// any synchronization. They can also trigger GC in an ad-hoc way. +// +// To deal with this, the forkjoin code creates a distinct |Allocator| +// object for each slice. You can access the appropriate object via +// the |ForkJoinContext| object that is provided to the callbacks. Once +// the execution is complete, all the objects found in these distinct +// |Allocator| is merged back into the main compartment lists and +// things proceed normally. +// +// In Ion-generated code, we will do allocation through the +// |Allocator| found in |ForkJoinContext| (which is obtained via TLS). +// Also, no write barriers are emitted. Conceptually, we should never +// need a write barrier because we only permit writes to objects that +// are newly allocated, and such objects are always black (to use +// incremental GC terminology). However, to be safe, we also block +// upon entering a parallel section to ensure that any concurrent +// marking or incremental GC has completed. +// +// In the future, it should be possible to lift the restriction that +// we must block until inc. GC has completed and also to permit GC +// during parallel exeution. But we're not there yet. +// +// Load balancing (work stealing): +// +// The ForkJoin job is dynamically divided into a fixed number of slices, +// and is submitted for parallel execution in the pool. When the number +// of slices is big enough (typically greater than the number of workers +// in the pool) -and the workload is unbalanced- each worker thread +// will perform load balancing through work stealing. The number +// of slices is computed by the self-hosted function |ComputeNumSlices| +// and can be used to know how many slices will be executed by the +// runtime for an array of the given size. +// +// Current Limitations: +// +// - The API does not support recursive or nested use. That is, the +// JavaScript function given to |ForkJoin| should not itself invoke +// |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to +// check for recursive use and execute a sequential fallback. +// +/////////////////////////////////////////////////////////////////////////// + +namespace js { + +class ForkJoinActivation : public Activation +{ + uint8_t *prevIonTop_; + + // We ensure that incremental GC be finished before we enter into a fork + // join section, but the runtime/zone might still be marked as needing + // barriers due to being in the middle of verifying barriers. Pause + // verification during the fork join section. + gc::AutoStopVerifyingBarriers av_; + + public: + ForkJoinActivation(JSContext *cx); + ~ForkJoinActivation(); +}; + +class ForkJoinContext; + +bool ForkJoin(JSContext *cx, CallArgs &args); + +struct IonLIRTraceData { + uint32_t blockIndex; + uint32_t lirIndex; + uint32_t execModeInt; + const char *lirOpName; + const char *mirOpName; + JSScript *script; + jsbytecode *pc; +}; + +/////////////////////////////////////////////////////////////////////////// +// Bailout tracking + +enum ParallelBailoutCause { + ParallelBailoutNone, + + // Compiler returned Method_Skipped + ParallelBailoutCompilationSkipped, + + // Compiler returned Method_CantCompile + ParallelBailoutCompilationFailure, + + // The periodic interrupt failed, which can mean that either + // another thread canceled, the user interrupted us, etc + ParallelBailoutInterrupt, + + // An IC update failed + ParallelBailoutFailedIC, + + // Heap busy flag was set during interrupt + ParallelBailoutHeapBusy, + + ParallelBailoutMainScriptNotPresent, + ParallelBailoutCalledToUncompiledScript, + ParallelBailoutIllegalWrite, + ParallelBailoutAccessToIntrinsic, + ParallelBailoutOverRecursed, + ParallelBailoutOutOfMemory, + ParallelBailoutUnsupported, + ParallelBailoutUnsupportedVM, + ParallelBailoutUnsupportedStringComparison, + ParallelBailoutRequestedGC, + ParallelBailoutRequestedZoneGC, +}; + +struct ParallelBailoutTrace { + JSScript *script; + jsbytecode *bytecode; +}; + +// See "Bailouts" section in comment above. +struct ParallelBailoutRecord { + JSScript *topScript; + ParallelBailoutCause cause; + + // Eventually we will support deeper traces, + // but for now we gather at most a single frame. + static const uint32_t MaxDepth = 1; + uint32_t depth; + ParallelBailoutTrace trace[MaxDepth]; + + void init(JSContext *cx); + void reset(JSContext *cx); + void setCause(ParallelBailoutCause cause, + JSScript *outermostScript = nullptr, // inliner (if applicable) + JSScript *currentScript = nullptr, // inlinee (if applicable) + jsbytecode *currentPc = nullptr); + void updateCause(ParallelBailoutCause cause, + JSScript *outermostScript, + JSScript *currentScript, + jsbytecode *currentPc); + void addTrace(JSScript *script, + jsbytecode *pc); +}; + +struct ForkJoinShared; + +class ForkJoinContext : public ThreadSafeContext +{ + public: + // Bailout record used to record the reason this thread stopped executing + ParallelBailoutRecord *const bailoutRecord; + +#ifdef DEBUG + // Records the last instr. to execute on this thread. + IonLIRTraceData traceData; + + // The maximum worker id. + uint32_t maxWorkerId; +#endif + + // When we run a par operation like mapPar, we create an out pointer + // into a specific region of the destination buffer. Even though the + // destination buffer is not thread-local, it is permissible to write into + // it via the handles provided. These two fields identify the memory + // region where writes are allowed so that the write guards can test for + // it. + // + // Note: we only permit writes into the *specific region* that the user + // is supposed to write. Normally, they only have access to this region + // anyhow. But due to sequential fallback it is possible for handles into + // other regions to escape into global variables in the sequential + // execution and then get accessed by later parallel sections. Thus we + // must be careful and ensure that the write is going through a handle + // into the correct *region* of the buffer. + uint8_t *targetRegionStart; + uint8_t *targetRegionEnd; + + ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker, + Allocator *allocator, ForkJoinShared *shared, + ParallelBailoutRecord *bailoutRecord); + + // Get the worker id. The main thread by convention has the id of the max + // worker thread id + 1. + uint32_t workerId() const { return worker_->id(); } + + // Get a slice of work for the worker associated with the context. + bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); } + + // True if this is the main thread, false if it is one of the parallel workers. + bool isMainThread() const; + + // When the code would normally trigger a GC, we don't trigger it + // immediately but instead record that request here. This will + // cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or + // |TriggerCompartmentGC()| as appropriate once the parallel + // section is complete. This is done because those routines do + // various preparations that are not thread-safe, and because the + // full set of arenas is not available until the end of the + // parallel section. + void requestGC(JS::gcreason::Reason reason); + void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason); + + // Set the fatal flag for the next abort. Used to distinguish retry or + // fatal aborts from VM functions. + bool setPendingAbortFatal(ParallelBailoutCause cause); + + // Reports an unsupported operation, returning false if we are reporting + // an error. Otherwise drop the warning on the floor. + bool reportError(ParallelBailoutCause cause, unsigned report) { + if (report & JSREPORT_ERROR) + return setPendingAbortFatal(cause); + return true; + } + + // During the parallel phase, this method should be invoked + // periodically, for example on every backedge, similar to the + // interrupt check. If it returns false, then the parallel phase + // has been aborted and so you should bailout. The function may + // also rendesvous to perform GC or do other similar things. + // + // This function is guaranteed to have no effect if both + // runtime()->interruptPar is zero. Ion-generated code takes + // advantage of this by inlining the checks on those flags before + // actually calling this function. If this function ends up + // getting called a lot from outside ion code, we can refactor + // it into an inlined version with this check that calls a slower + // version. + bool check(); + + // Be wary, the runtime is shared between all threads! + JSRuntime *runtime(); + + // Acquire and release the JSContext from the runtime. + JSContext *acquireJSContext(); + void releaseJSContext(); + bool hasAcquiredJSContext() const; + + // Check the current state of parallel execution. + static inline ForkJoinContext *current(); + + // Initializes the thread-local state. + static bool initialize(); + + // Used in inlining GetForkJoinSlice. + static size_t offsetOfWorker() { + return offsetof(ForkJoinContext, worker_); + } + + private: + friend class AutoSetForkJoinContext; + + // Initialized by initialize() + static mozilla::ThreadLocal tlsForkJoinContext; + + ForkJoinShared *const shared_; + + ThreadPoolWorker *worker_; + + bool acquiredJSContext_; + + // ForkJoinContext is allocated on the stack. It would be dangerous to GC + // with it live because of the GC pointer fields stored in the context. + JS::AutoAssertNoGC nogc_; +}; + +// Locks a JSContext for its scope. Be very careful, because locking a +// JSContext does *not* allow you to safely mutate the data in the +// JSContext unless you can guarantee that any of the other threads +// that want to access that data will also acquire the lock, which is +// generally not the case. For example, the lock is used in the IC +// code to allow us to atomically patch up the dispatch table, but we +// must be aware that other threads may be reading from the table even +// as we write to it (though they cannot be writing, since they must +// hold the lock to write). +class LockedJSContext +{ +#if defined(JS_THREADSAFE) && defined(JS_ION) + ForkJoinContext *cx_; +#endif + JSContext *jscx_; + + public: + LockedJSContext(ForkJoinContext *cx) +#if defined(JS_THREADSAFE) && defined(JS_ION) + : cx_(cx), + jscx_(cx->acquireJSContext()) +#else + : jscx_(nullptr) +#endif + { } + + ~LockedJSContext() { +#if defined(JS_THREADSAFE) && defined(JS_ION) + cx_->releaseJSContext(); +#endif + } + + operator JSContext *() { return jscx_; } + JSContext *operator->() { return jscx_; } +}; + +bool InExclusiveParallelSection(); + +bool ParallelTestsShouldPass(JSContext *cx); + +void RequestInterruptForForkJoin(JSRuntime *rt, JSRuntime::InterruptMode mode); + +bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp); +extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo; + +bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp); +extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo; + +/////////////////////////////////////////////////////////////////////////// +// Debug Spew + +namespace jit { + class MDefinition; +} + +namespace parallel { + +enum ExecutionStatus { + // Parallel or seq execution terminated in a fatal way, operation failed + ExecutionFatal, + + // Parallel exec failed and so we fell back to sequential + ExecutionSequential, + + // We completed the work in seq mode before parallel compilation completed + ExecutionWarmup, + + // Parallel exec was successful after some number of bailouts + ExecutionParallel +}; + +enum SpewChannel { + SpewOps, + SpewCompile, + SpewBailouts, + NumSpewChannels +}; + +#if defined(DEBUG) && defined(JS_THREADSAFE) && defined(JS_ION) + +bool SpewEnabled(SpewChannel channel); +void Spew(SpewChannel channel, const char *fmt, ...); +void SpewBeginOp(JSContext *cx, const char *name); +void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc, + ParallelBailoutCause cause); +ExecutionStatus SpewEndOp(ExecutionStatus status); +void SpewBeginCompile(HandleScript script); +jit::MethodStatus SpewEndCompile(jit::MethodStatus status); +void SpewMIR(jit::MDefinition *mir, const char *fmt, ...); +void SpewBailoutIR(IonLIRTraceData *data); + +#else + +static inline bool SpewEnabled(SpewChannel channel) { return false; } +static inline void Spew(SpewChannel channel, const char *fmt, ...) { } +static inline void SpewBeginOp(JSContext *cx, const char *name) { } +static inline void SpewBailout(uint32_t count, HandleScript script, + jsbytecode *pc, ParallelBailoutCause cause) {} +static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; } +static inline void SpewBeginCompile(HandleScript script) { } +#ifdef JS_ION +static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; } +static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { } +#endif +static inline void SpewBailoutIR(IonLIRTraceData *data) { } + +#endif // DEBUG && JS_THREADSAFE && JS_ION + +} // namespace parallel +} // namespace js + +/* static */ inline js::ForkJoinContext * +js::ForkJoinContext::current() +{ + return tlsForkJoinContext.get(); +} + +namespace js { + +static inline bool +InParallelSection() +{ + return ForkJoinContext::current() != nullptr; +} + +} // namespace js + +#endif /* vm_ForkJoin_h */