diff -r 000000000000 -r 6474c204b198 js/src/vm/ForkJoin.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/js/src/vm/ForkJoin.h	Wed Dec 31 06:09:35 2014 +0100
@@ -0,0 +1,559 @@
+/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
+ * vim: set ts=8 sts=4 et sw=4 tw=99:
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#ifndef vm_ForkJoin_h
+#define vm_ForkJoin_h
+
+#include "mozilla/ThreadLocal.h"
+
+#include "jscntxt.h"
+
+#include "gc/GCInternals.h"
+
+#include "jit/Ion.h"
+
+///////////////////////////////////////////////////////////////////////////
+// Read Me First
+//
+// The ForkJoin abstraction:
+// -------------------------
+//
+// This is the building block for executing multi-threaded JavaScript with
+// shared memory (as distinct from Web Workers).  The idea is that you have
+// some (typically data-parallel) operation which you wish to execute in
+// parallel across as many threads as you have available.
+//
+// The ForkJoin abstraction is intended to be used by self-hosted code
+// to enable parallel execution.  At the top-level, it consists of a native
+// function (exposed as the ForkJoin intrinsic) that is used like so:
+//
+//     ForkJoin(func, sliceStart, sliceEnd, mode)
+//
+// The intention of this statement is to start some some number (usually the
+// number of hardware threads) of copies of |func()| running in parallel. Each
+// copy will then do a portion of the total work, depending on
+// workstealing-based load balancing.
+//
+// Typically, each of the N slices runs in a different worker thread, but that
+// is not something you should rely upon---if work-stealing is enabled it
+// could be that a single worker thread winds up handling multiple slices.
+//
+// The second and third arguments, |sliceStart| and |sliceEnd|, are the slice
+// boundaries. These numbers must each fit inside an uint16_t.
+//
+// The fourth argument, |mode|, is an internal mode integer giving finer
+// control over the behavior of ForkJoin. See the |ForkJoinMode| enum.
+//
+// func() should expect the following arguments:
+//
+//     func(workerId, sliceStart, sliceEnd)
+//
+// The |workerId| parameter is the id of the worker executing the function. It
+// is 0 in sequential mode.
+//
+// The |sliceStart| and |sliceEnd| parameters are the current bounds that that
+// the worker is handling. In parallel execution, these parameters are not
+// used. In sequential execution, they tell the worker what slices should be
+// processed. During the warm up phase, sliceEnd == sliceStart + 1.
+//
+// |func| can keep asking for more work from the scheduler by calling the
+// intrinsic |GetForkJoinSlice(sliceStart, sliceEnd, id)|. When there are no
+// more slices to hand out, ThreadPool::MAX_SLICE_ID is returned as a sentinel
+// value. By exposing this function as an intrinsic, we reduce the number of
+// JS-C++ boundary crossings incurred by workstealing, which may have many
+// slices.
+//
+// In sequential execution, |func| should return the maximum computed slice id
+// S for which all slices with id < S have already been processed. This is so
+// ThreadPool can track the leftmost completed slice id to maintain
+// determinism. Slices which have been completed in sequential execution
+// cannot be re-run in parallel execution.
+//
+// In parallel execution, |func| MUST PROCESS ALL SLICES BEFORE RETURNING!
+// Not doing so is an error and is protected by debug asserts in ThreadPool.
+//
+// Warmups and Sequential Fallbacks
+// --------------------------------
+//
+// ForkJoin can only execute code in parallel when it has been
+// ion-compiled in Parallel Execution Mode. ForkJoin handles this part
+// for you. However, because ion relies on having decent type
+// information available, it is necessary to run the code sequentially
+// for a few iterations first to get the various type sets "primed"
+// with reasonable information.  We try to make do with just a few
+// runs, under the hypothesis that parallel execution code which reach
+// type stability relatively quickly.
+//
+// The general strategy of ForkJoin is as follows:
+//
+// - If the code has not yet been run, invoke `func` sequentially with
+//   warmup set to true.  When warmup is true, `func` should try and
+//   do less work than normal---just enough to prime type sets. (See
+//   ParallelArray.js for a discussion of specifically how we do this
+//   in the case of ParallelArray).
+//
+// - Try to execute the code in parallel.  Parallel execution mode has
+//   three possible results: success, fatal error, or bailout.  If a
+//   bailout occurs, it means that the code attempted some action
+//   which is not possible in parallel mode.  This might be a
+//   modification to shared state, but it might also be that it
+//   attempted to take some theoreticaly pure action that has not been
+//   made threadsafe (yet?).
+//
+// - If parallel execution is successful, ForkJoin returns true.
+//
+// - If parallel execution results in a fatal error, ForkJoin returns false.
+//
+// - If parallel execution results in a *bailout*, this is when things
+//   get interesting.  In that case, the semantics of parallel
+//   execution guarantee us that no visible side effects have occurred
+//   (unless they were performed with the intrinsic
+//   |UnsafePutElements()|, which can only be used in self-hosted
+//   code).  We therefore reinvoke |func()| but with warmup set to
+//   true.  The idea here is that often parallel bailouts result from
+//   a failed type guard or other similar assumption, so rerunning the
+//   warmup sequentially gives us a chance to recompile with more
+//   data.  Because warmup is true, we do not expect this sequential
+//   call to process all remaining data, just a chunk.  After this
+//   recovery execution is complete, we again attempt parallel
+//   execution.
+//
+// - If more than a fixed number of bailouts occur, we give up on
+//   parallelization and just invoke |func()| N times in a row (once
+//   for each worker) but with |warmup| set to false.
+//
+// Interrupts:
+//
+// During parallel execution, |cx.check()| must be periodically invoked to
+// check for interrupts. This is automatically done by the Ion-generated
+// code. If an interrupt has been requested |cx.check()| aborts parallel
+// execution.
+//
+// Transitive compilation:
+//
+// One of the challenges for parallel compilation is that we
+// (currently) have to abort when we encounter an uncompiled script.
+// Therefore, we try to compile everything that might be needed
+// beforehand. The exact strategy is described in `ParallelDo::apply()`
+// in ForkJoin.cpp, but at the highest level the idea is:
+//
+// 1. We maintain a flag on every script telling us if that script and
+//    its transitive callees are believed to be compiled. If that flag
+//    is set, we can skip the initial compilation.
+// 2. Otherwise, we maintain a worklist that begins with the main
+//    script. We compile it and then examine the generated parallel IonScript,
+//    which will have a list of callees. We enqueue those. Some of these
+//    compilations may take place off the main thread, in which case
+//    we will run warmup iterations while we wait for them to complete.
+// 3. If the warmup iterations finish all the work, we're done.
+// 4. If compilations fail, we fallback to sequential.
+// 5. Otherwise, we will try running in parallel once we're all done.
+//
+// Bailout tracing and recording:
+//
+// When a bailout occurs, we record a bit of state so that we can
+// recover with grace. Each |ForkJoinContext| has a pointer to a
+// |ParallelBailoutRecord| pre-allocated for this purpose. This
+// structure is used to record the cause of the bailout, the JSScript
+// which was executing, as well as the location in the source where
+// the bailout occurred (in principle, we can record a full stack
+// trace, but right now we only record the top-most frame). Note that
+// the error location might not be in the same JSScript as the one
+// which was executing due to inlining.
+//
+// Garbage collection and allocation:
+//
+// Code which executes on these parallel threads must be very careful
+// with respect to garbage collection and allocation.  The typical
+// allocation paths are UNSAFE in parallel code because they access
+// shared state (the compartment's arena lists and so forth) without
+// any synchronization.  They can also trigger GC in an ad-hoc way.
+//
+// To deal with this, the forkjoin code creates a distinct |Allocator|
+// object for each slice.  You can access the appropriate object via
+// the |ForkJoinContext| object that is provided to the callbacks.  Once
+// the execution is complete, all the objects found in these distinct
+// |Allocator| is merged back into the main compartment lists and
+// things proceed normally.
+//
+// In Ion-generated code, we will do allocation through the
+// |Allocator| found in |ForkJoinContext| (which is obtained via TLS).
+// Also, no write barriers are emitted.  Conceptually, we should never
+// need a write barrier because we only permit writes to objects that
+// are newly allocated, and such objects are always black (to use
+// incremental GC terminology).  However, to be safe, we also block
+// upon entering a parallel section to ensure that any concurrent
+// marking or incremental GC has completed.
+//
+// In the future, it should be possible to lift the restriction that
+// we must block until inc. GC has completed and also to permit GC
+// during parallel exeution. But we're not there yet.
+//
+// Load balancing (work stealing):
+//
+// The ForkJoin job is dynamically divided into a fixed number of slices,
+// and is submitted for parallel execution in the pool. When the number
+// of slices is big enough (typically greater than the number of workers
+// in the pool) -and the workload is unbalanced- each worker thread
+// will perform load balancing through work stealing. The number
+// of slices is computed by the self-hosted function |ComputeNumSlices|
+// and can be used to know how many slices will be executed by the
+// runtime for an array of the given size.
+//
+// Current Limitations:
+//
+// - The API does not support recursive or nested use.  That is, the
+//   JavaScript function given to |ForkJoin| should not itself invoke
+//   |ForkJoin()|. Instead, use the intrinsic |InParallelSection()| to
+//   check for recursive use and execute a sequential fallback.
+//
+///////////////////////////////////////////////////////////////////////////
+
+namespace js {
+
+class ForkJoinActivation : public Activation
+{
+    uint8_t *prevIonTop_;
+
+    // We ensure that incremental GC be finished before we enter into a fork
+    // join section, but the runtime/zone might still be marked as needing
+    // barriers due to being in the middle of verifying barriers. Pause
+    // verification during the fork join section.
+    gc::AutoStopVerifyingBarriers av_;
+
+  public:
+    ForkJoinActivation(JSContext *cx);
+    ~ForkJoinActivation();
+};
+
+class ForkJoinContext;
+
+bool ForkJoin(JSContext *cx, CallArgs &args);
+
+struct IonLIRTraceData {
+    uint32_t blockIndex;
+    uint32_t lirIndex;
+    uint32_t execModeInt;
+    const char *lirOpName;
+    const char *mirOpName;
+    JSScript *script;
+    jsbytecode *pc;
+};
+
+///////////////////////////////////////////////////////////////////////////
+// Bailout tracking
+
+enum ParallelBailoutCause {
+    ParallelBailoutNone,
+
+    // Compiler returned Method_Skipped
+    ParallelBailoutCompilationSkipped,
+
+    // Compiler returned Method_CantCompile
+    ParallelBailoutCompilationFailure,
+
+    // The periodic interrupt failed, which can mean that either
+    // another thread canceled, the user interrupted us, etc
+    ParallelBailoutInterrupt,
+
+    // An IC update failed
+    ParallelBailoutFailedIC,
+
+    // Heap busy flag was set during interrupt
+    ParallelBailoutHeapBusy,
+
+    ParallelBailoutMainScriptNotPresent,
+    ParallelBailoutCalledToUncompiledScript,
+    ParallelBailoutIllegalWrite,
+    ParallelBailoutAccessToIntrinsic,
+    ParallelBailoutOverRecursed,
+    ParallelBailoutOutOfMemory,
+    ParallelBailoutUnsupported,
+    ParallelBailoutUnsupportedVM,
+    ParallelBailoutUnsupportedStringComparison,
+    ParallelBailoutRequestedGC,
+    ParallelBailoutRequestedZoneGC,
+};
+
+struct ParallelBailoutTrace {
+    JSScript *script;
+    jsbytecode *bytecode;
+};
+
+// See "Bailouts" section in comment above.
+struct ParallelBailoutRecord {
+    JSScript *topScript;
+    ParallelBailoutCause cause;
+
+    // Eventually we will support deeper traces,
+    // but for now we gather at most a single frame.
+    static const uint32_t MaxDepth = 1;
+    uint32_t depth;
+    ParallelBailoutTrace trace[MaxDepth];
+
+    void init(JSContext *cx);
+    void reset(JSContext *cx);
+    void setCause(ParallelBailoutCause cause,
+                  JSScript *outermostScript = nullptr,   // inliner (if applicable)
+                  JSScript *currentScript = nullptr,     // inlinee (if applicable)
+                  jsbytecode *currentPc = nullptr);
+    void updateCause(ParallelBailoutCause cause,
+                     JSScript *outermostScript,
+                     JSScript *currentScript,
+                     jsbytecode *currentPc);
+    void addTrace(JSScript *script,
+                  jsbytecode *pc);
+};
+
+struct ForkJoinShared;
+
+class ForkJoinContext : public ThreadSafeContext
+{
+  public:
+    // Bailout record used to record the reason this thread stopped executing
+    ParallelBailoutRecord *const bailoutRecord;
+
+#ifdef DEBUG
+    // Records the last instr. to execute on this thread.
+    IonLIRTraceData traceData;
+
+    // The maximum worker id.
+    uint32_t maxWorkerId;
+#endif
+
+    // When we run a par operation like mapPar, we create an out pointer
+    // into a specific region of the destination buffer. Even though the
+    // destination buffer is not thread-local, it is permissible to write into
+    // it via the handles provided. These two fields identify the memory
+    // region where writes are allowed so that the write guards can test for
+    // it.
+    //
+    // Note: we only permit writes into the *specific region* that the user
+    // is supposed to write. Normally, they only have access to this region
+    // anyhow. But due to sequential fallback it is possible for handles into
+    // other regions to escape into global variables in the sequential
+    // execution and then get accessed by later parallel sections. Thus we
+    // must be careful and ensure that the write is going through a handle
+    // into the correct *region* of the buffer.
+    uint8_t *targetRegionStart;
+    uint8_t *targetRegionEnd;
+
+    ForkJoinContext(PerThreadData *perThreadData, ThreadPoolWorker *worker,
+                    Allocator *allocator, ForkJoinShared *shared,
+                    ParallelBailoutRecord *bailoutRecord);
+
+    // Get the worker id. The main thread by convention has the id of the max
+    // worker thread id + 1.
+    uint32_t workerId() const { return worker_->id(); }
+
+    // Get a slice of work for the worker associated with the context.
+    bool getSlice(uint16_t *sliceId) { return worker_->getSlice(this, sliceId); }
+
+    // True if this is the main thread, false if it is one of the parallel workers.
+    bool isMainThread() const;
+
+    // When the code would normally trigger a GC, we don't trigger it
+    // immediately but instead record that request here.  This will
+    // cause |ExecuteForkJoinOp()| to invoke |TriggerGC()| or
+    // |TriggerCompartmentGC()| as appropriate once the parallel
+    // section is complete. This is done because those routines do
+    // various preparations that are not thread-safe, and because the
+    // full set of arenas is not available until the end of the
+    // parallel section.
+    void requestGC(JS::gcreason::Reason reason);
+    void requestZoneGC(JS::Zone *zone, JS::gcreason::Reason reason);
+
+    // Set the fatal flag for the next abort. Used to distinguish retry or
+    // fatal aborts from VM functions.
+    bool setPendingAbortFatal(ParallelBailoutCause cause);
+
+    // Reports an unsupported operation, returning false if we are reporting
+    // an error. Otherwise drop the warning on the floor.
+    bool reportError(ParallelBailoutCause cause, unsigned report) {
+        if (report & JSREPORT_ERROR)
+            return setPendingAbortFatal(cause);
+        return true;
+    }
+
+    // During the parallel phase, this method should be invoked
+    // periodically, for example on every backedge, similar to the
+    // interrupt check.  If it returns false, then the parallel phase
+    // has been aborted and so you should bailout.  The function may
+    // also rendesvous to perform GC or do other similar things.
+    //
+    // This function is guaranteed to have no effect if both
+    // runtime()->interruptPar is zero.  Ion-generated code takes
+    // advantage of this by inlining the checks on those flags before
+    // actually calling this function.  If this function ends up
+    // getting called a lot from outside ion code, we can refactor
+    // it into an inlined version with this check that calls a slower
+    // version.
+    bool check();
+
+    // Be wary, the runtime is shared between all threads!
+    JSRuntime *runtime();
+
+    // Acquire and release the JSContext from the runtime.
+    JSContext *acquireJSContext();
+    void releaseJSContext();
+    bool hasAcquiredJSContext() const;
+
+    // Check the current state of parallel execution.
+    static inline ForkJoinContext *current();
+
+    // Initializes the thread-local state.
+    static bool initialize();
+
+    // Used in inlining GetForkJoinSlice.
+    static size_t offsetOfWorker() {
+        return offsetof(ForkJoinContext, worker_);
+    }
+
+  private:
+    friend class AutoSetForkJoinContext;
+
+    // Initialized by initialize()
+    static mozilla::ThreadLocal<ForkJoinContext*> tlsForkJoinContext;
+
+    ForkJoinShared *const shared_;
+
+    ThreadPoolWorker *worker_;
+
+    bool acquiredJSContext_;
+
+    // ForkJoinContext is allocated on the stack. It would be dangerous to GC
+    // with it live because of the GC pointer fields stored in the context.
+    JS::AutoAssertNoGC nogc_;
+};
+
+// Locks a JSContext for its scope. Be very careful, because locking a
+// JSContext does *not* allow you to safely mutate the data in the
+// JSContext unless you can guarantee that any of the other threads
+// that want to access that data will also acquire the lock, which is
+// generally not the case. For example, the lock is used in the IC
+// code to allow us to atomically patch up the dispatch table, but we
+// must be aware that other threads may be reading from the table even
+// as we write to it (though they cannot be writing, since they must
+// hold the lock to write).
+class LockedJSContext
+{
+#if defined(JS_THREADSAFE) && defined(JS_ION)
+    ForkJoinContext *cx_;
+#endif
+    JSContext *jscx_;
+
+  public:
+    LockedJSContext(ForkJoinContext *cx)
+#if defined(JS_THREADSAFE) && defined(JS_ION)
+      : cx_(cx),
+        jscx_(cx->acquireJSContext())
+#else
+      : jscx_(nullptr)
+#endif
+    { }
+
+    ~LockedJSContext() {
+#if defined(JS_THREADSAFE) && defined(JS_ION)
+        cx_->releaseJSContext();
+#endif
+    }
+
+    operator JSContext *() { return jscx_; }
+    JSContext *operator->() { return jscx_; }
+};
+
+bool InExclusiveParallelSection();
+
+bool ParallelTestsShouldPass(JSContext *cx);
+
+void RequestInterruptForForkJoin(JSRuntime *rt, JSRuntime::InterruptMode mode);
+
+bool intrinsic_SetForkJoinTargetRegion(JSContext *cx, unsigned argc, Value *vp);
+extern const JSJitInfo intrinsic_SetForkJoinTargetRegionInfo;
+
+bool intrinsic_ClearThreadLocalArenas(JSContext *cx, unsigned argc, Value *vp);
+extern const JSJitInfo intrinsic_ClearThreadLocalArenasInfo;
+
+///////////////////////////////////////////////////////////////////////////
+// Debug Spew
+
+namespace jit {
+    class MDefinition;
+}
+
+namespace parallel {
+
+enum ExecutionStatus {
+    // Parallel or seq execution terminated in a fatal way, operation failed
+    ExecutionFatal,
+
+    // Parallel exec failed and so we fell back to sequential
+    ExecutionSequential,
+
+    // We completed the work in seq mode before parallel compilation completed
+    ExecutionWarmup,
+
+    // Parallel exec was successful after some number of bailouts
+    ExecutionParallel
+};
+
+enum SpewChannel {
+    SpewOps,
+    SpewCompile,
+    SpewBailouts,
+    NumSpewChannels
+};
+
+#if defined(DEBUG) && defined(JS_THREADSAFE) && defined(JS_ION)
+
+bool SpewEnabled(SpewChannel channel);
+void Spew(SpewChannel channel, const char *fmt, ...);
+void SpewBeginOp(JSContext *cx, const char *name);
+void SpewBailout(uint32_t count, HandleScript script, jsbytecode *pc,
+                 ParallelBailoutCause cause);
+ExecutionStatus SpewEndOp(ExecutionStatus status);
+void SpewBeginCompile(HandleScript script);
+jit::MethodStatus SpewEndCompile(jit::MethodStatus status);
+void SpewMIR(jit::MDefinition *mir, const char *fmt, ...);
+void SpewBailoutIR(IonLIRTraceData *data);
+
+#else
+
+static inline bool SpewEnabled(SpewChannel channel) { return false; }
+static inline void Spew(SpewChannel channel, const char *fmt, ...) { }
+static inline void SpewBeginOp(JSContext *cx, const char *name) { }
+static inline void SpewBailout(uint32_t count, HandleScript script,
+                               jsbytecode *pc, ParallelBailoutCause cause) {}
+static inline ExecutionStatus SpewEndOp(ExecutionStatus status) { return status; }
+static inline void SpewBeginCompile(HandleScript script) { }
+#ifdef JS_ION
+static inline jit::MethodStatus SpewEndCompile(jit::MethodStatus status) { return status; }
+static inline void SpewMIR(jit::MDefinition *mir, const char *fmt, ...) { }
+#endif
+static inline void SpewBailoutIR(IonLIRTraceData *data) { }
+
+#endif // DEBUG && JS_THREADSAFE && JS_ION
+
+} // namespace parallel
+} // namespace js
+
+/* static */ inline js::ForkJoinContext *
+js::ForkJoinContext::current()
+{
+    return tlsForkJoinContext.get();
+}
+
+namespace js {
+
+static inline bool
+InParallelSection()
+{
+    return ForkJoinContext::current() != nullptr;
+}
+
+} // namespace js
+
+#endif /* vm_ForkJoin_h */